From: Zack Cerza Date: Wed, 28 Mar 2018 22:43:47 +0000 (-0600) Subject: ceph-graphite: Skip irrelevant integration tests X-Git-Tag: v2.0~37^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fe42c35544ee97fc12b3c81e5dff773ad8a42836;p=cephmetrics.git ceph-graphite: Skip irrelevant integration tests Signed-off-by: Zack Cerza --- fe42c35544ee97fc12b3c81e5dff773ad8a42836 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7079d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# vim swap files +*.swp + +# Python virtualenv +virtualenv/ + +# ansible retry files +*.retry diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..4ca0c09 --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,125 @@ +# Installation Process + +## Objective: +Run a Grafana instance to provide a monitoring dashboard to a ceph +cluster. + +## Pre-requisites +### Monitoring host +- docker and docker-compose (for simplicity) +- grafana image (official latest 4.3 release from docker hub) +- graphite image (docker.io/abezhenar/graphite-centos7) +- clone the cephmetrics repo (docker configuration, dashboards) +- host that will run the monitor should have passwordless ssh to all the ceph +nodes +- the storage for the graphite database should be on SSD/flash if possible +- needs PyYAML, tested with python 2.7.13 + +### Ceph Cluster Nodes +- collectd rpm (5.7 or above) + +## Installation Sequence +Install the monitoring endpoint first, and then apply the collectd configuration +to each of the ceph nodes. + + +## Setting Up the monitoring endpoint +On the monitoring host, perform the following steps; +1. Pull the required docker images (*listed above*) +2. we need to persist the grafana configuration db and settings, as well as the +graphite data. +```markdown +mkdir -p /opt/docker/grafana/etc +mkdir -p /opt/docker/grafana/data/plugins +mkdir -p /opt/docker/graphite +``` +3. Download the additional status panel plugin +```markdown +cd /opt/docker/grafana/data/plugins +wget https://grafana.com/api/plugins/vonage-status-panel/versions/1.0.4/download +unzip download +rm -f download +``` +4. Copy the seed .ini file for grafana to the containers etc directory, and reset +the permissions to be compatible with the containers +```markdown +cp etc/grafana/grafana.ini /opt/docker/grafana/etc +chown -R 104:107 /opt/docker/grafana +chown -R 997 /opt/docker/graphite +chmod g+w /opt/docker/graphite + +``` +5. Edit the docker-compose.yml example (if necessary) +6. From the directory with the compose file, issue +``` +docker-compose up -d +``` +7. check that the containers are running and the endpoints are listening +7.1 Use ```docker ps``` +7.2 use ```netstat``` and look for the following ports: 3000,80,2003,2004,7002 +7.3 open a browser and connect to graphite - it should be running on port 80 of +the local machine +8. Add the graphite instance as a datasource to grafana +8.1 update setup/add_datasource.json with the IP of the host machine +8.2 register the graphite instance to grafana as the default data source +```markdown +curl -u admin:admin -H "Content-Type: application/json" -X POST http://localhost:3000/api/datasources \ +--data-binary @setup/add_datasource.json +``` +9. Install the grafana labs pie-chart plugin +9.1 open a shell session to the grafana instance, and install the plugin +```markdown +docker exec -it grafana bash +grafana-cli plugins install grafana-piechart-panel +``` +10. the sample dashboards need to be added/edited to reflect the ceph cluster to +monitor +10.1 seed dashboards are provided in the dashboards/current directory +10.2 edit ```dashboard.yml``` with the shortnames of the OSD's and RGW's, plus +the dns domain name of the environment. +10.3 run the following command +```markdown +python dashUpdater.py +``` + + +### Updating the dashboards +After adding ceph nodes to the configuration, update the ```dashboard.yml``` +file, and then rerun the ```dashUpdater.py``` script. + + +## Configuration on Each Ceph Node +You may need to update your SELINUX policy to allow the write_graphite plugin +to access outbound on port 2003. To test, simply disable SELINUX +1. install collectd (this will also require libcollectdclient) +2. create the required directories for the cephmetrics collectors (see known +issues [2]) +```markdown +mkdir -p /usr/lib64/collectd/python-plugins/collectors +``` +3. copy the collectors to the directory created in [2], and cephmetrics.py +to /usr/lib64/collectd/python-plugins +4. Setup the collectd plugins +4.1 Update the write_graphite.conf file to specify the hostname where the +grafana/graphite environment is (use a hostname not IP - anecdotally I found that +with an IP the plugin fails to connect to the graphite container port?) +4.2 copy the example plugin files to the /etc/collectd.d directory (i.e. cpu.conf, +memory.conf etc) +5. update the "ClusterName" parameter in the cephmetrics plugin file to match + the name of your ceph cluster (default is 'ceph') +6. copy the example collectd.conf file to the ceph node (or update the existing +configuration to ensure there is a ```Include "/etc/collectd.d/*.conf"``` entry) +7. enable collectd +8. start collectd +9. check collectd is running without errors + +## Known Issues +1. Following a reboot of an OSD node, the cephmetrics collectd plugin doesn't send disk +stats. ***Workaround**: Following the reboot of an OSD, restart the collectd service.* +2. the cephmetrics.py and collectors should be installed through python-setuptools to cut down on +the installation steps. +3. SELINUX may block the write_graphite plugin writing outbound on port 2003 + + + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README b/README new file mode 100644 index 0000000..b6fd253 --- /dev/null +++ b/README @@ -0,0 +1,58 @@ +On the ceph node install collectd +- tested with collectd-5.7.0-4.el7ost.x86_64 + +Interval is set to 10 in collectd.conf + +write_graphite plugin configured as follows; + +LoadPlugin "write_graphite" + + + Host "192.168.1.52" + Port "2003" + Protocol "tcp" + LogSendErrors true + Prefix "collectd." + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + PreserveSeparator true + SeparateInstances true + + + +5.7 introduces the PreserveSeparator parm, allowing the type instance name to +contain '.'. At the moment I used .'s in the metric name since the same plugin +provides all metrics. + +Comments welcome! + +This is what an entry looks like as seen in influx +collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._rgw_root.num_bytes_recovered + | | \ \ \ \ + | | \ \ \ \ + | | | \ \ \ +prefix | hostname | plugin name | type |ceph|metric| metric name + name group + +In the case of pools, the metric name is prefixed by the pool name + +Added cephmetrics.conf to /etc/collectd.d dir + +mkdir -p /usr/lib64/collectd/python-plugins + +to the plugin dir, I copied + - cephmetrics.py + - collectors dir + + +Known Issues +1. Singlestat panels don't track the graph panel values 100% of the time + + + + +Container Configuration - pulled from docker.io registry +- grafana - grafana/grafana (official image) +- graphite - nickstenning/graphite - https://hub.docker.com/r/nickstenning/graphite/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..efe3413 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# cephmetrics + +Cephmetrics is a tool that allows a user to visually monitor various metrics in a running Ceph cluster. + +## Prerequisites +- RHEL 7 should be running on all hosts +- A functional ceph cluster running version ceph-osd-10.2.7-27.el7cp.x86_64 or later is already up and running. +- Another host machine independent of the ceph machines must be available. This host will be used to receive data pushed by the hosts in the Ceph cluster, and will run the dashboard to display that data. +- A host machine on which to execute `ansible-playbook` to orchestrate the deployment must be available. +- Passwordless SSH access from the deploy host to the ceph hosts. The username should be the same for all hosts. +- Passwordless sudo access on the ceph and dashboard hosts +- All hosts must share the same DNS domain + +## Resulting configuration + +After running this procedure, you will have the following configuration. +- The ceph nodes will have `collectd` installed, along with collector plugins from `cephmetrics-collectd` +- The dashboard host will have `grafana` installed and configured to display various dashboards by querying data received from Ceph nodes via a `graphite-web`, `python-carbon`, and `python-whisper` stack. + +## Installation + +### Install cephmetrics-ansible + +First, decide which machine you want to use to run `ansible-playbook`. If you used [`ceph-ansible`](https://github.com/ceph/ceph-ansible) to set up your cluster, you may want to reuse that same host to take advantage of the inventory file that was created as part of that process. + +Once the host is selected, perform the following steps there. This will install a repo which includes the cephmetrics installation code and ansible (version 2.2.3 or later): +``` +sudo su - +mkdir ~/cephmetrics +subscription-manager repos --enable rhel-7-server-optional-rpms --enable rhel-7-server-rhscon-2-installer-rpms +curl -L -o /etc/yum.repos.d/cephmetrics.repo http://download.ceph.com/cephmetrics/rpm-master/el7/cephmetrics.repo +yum install cephmetrics-ansible +``` + +### Create or edit the inventory file + +Next, we need an inventory file. If you are running `ansible-playbook` on a host that previously ran `ceph-ansible`, you may simply modify `/etc/ansible/hosts`; otherwise you may copy `/usr/share/cephmetrics-ansible/inventory.sample` and modify it if you wish. + +The inventory file format looks like: + + [ceph-grafana] + grafana_host.example.com + + [osds] + osd0.example.com + osd1.example.com + osd2.example.com + + [mons] + mon0.example.com + mon1.example.com + mon2.example.com + + [mdss] + mds0.example.com + + [rgws] + rgw0.example.com + +If you are running `ansible-playbook` on a host mentioned in the inventory file, you will need to append `ansible_connection=local` to each line in the inventory file that mentions that host. An example: + ``` + my_host.example.com ansible_connection=local + ``` +Omit the mdss section if no ceph mds nodes are installed. Omit the rgws section if no rgw nodes are installed. + +Ansible variables can be set in a `vars.yml` file if necessary. If it is required, make sure to add `-e '@/path/to/vars.yml` to your `ansible-playbook` invocation below. [Click here](./ansible/README.md) for more information. + +## Deploy via ansible-playbook + +If you are using a `ceph-ansible` host, run these commands: +``` +cd /usr/share/cephmetrics-ansible +ansible-playbook -v playbook.yml +``` + +Otherwise, run these commands: +``` +cd /usr/share/cephmetrics-ansible +ansible-playbook -v -i /path/to/inventory playbook.yml +``` + +Note: The reason it is necessary to change directories is so that `ansible-playbook` will use the bundled `ansible.cfg`; there is currently no command-line argument allowing the specification of an arbitrary `.cfg` file. diff --git a/TODO b/TODO new file mode 100644 index 0000000..cc52402 --- /dev/null +++ b/TODO @@ -0,0 +1,25 @@ + +collectd +- add network and cpu to all deployments +- define standard easy roll-out conf (use collectd.d for write_graphite, cephmetrics, cpu and network) + +Dashboard + + +collectd : cephmetrics +- add metrics starting message so you know collection is active + +Python Modules +Mon +- + +RGW +- + + + +Completed Items +05/24 RGW: implement the latencies as different metrics to allow summarisation at the db layer +05/24 MON: add ceph health status (OK/WARN) to output dict +05/24 Dashboard: Add ceph health text +05/29 OSD: OSD metrics added, with dashboard updates \ No newline at end of file diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000..a923fda --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,90 @@ +# Deploying cephmetrics with ansible + +This set of ansible roles, in combination with `playbook.yml`, provide a way to deploy cephmetrics to monitor a Ceph cluster. + +## Prerequisites +- RHEL 7 is supported with `devel_mode` set to `True` or `False`. Ubuntu 16.04 and CentOS 7 are supported only when `devel_mode` is `True` at this point. +- Currently only RHEL 7 is supported for all hosts +- A functional [ceph](https://ceph.com/) cluster. [collectd](https://collectd.org/) will be used to collect metrics +- A separate host to receive data pushed by hosts in the Ceph cluster, and run the dashboard to display that data. +- An inventory file describing your cluster. +- A host on which to execute `ansible-playbook` to orchestrate the deployment. This can be the same as the dashboard host. +- Passwordless SSH access from the deploy host to the ceph hosts. The username should be the same for all hosts. +- Passwordless sudo access on the ceph and dashboard hosts +- All hosts must share the same DNS domain + +## Example inventory file + + [ceph-grafana] + cephmetrics.example.com + + [osds] + osd0.example.com + osd1.example.com + osd3.example.com + + [mons] + mon0.example.com + mon1.example.com + mon2.example.com + + [mdss] + mds0.example.com + + [rgws] + rgw0.example.com + +Notes: +- Omit any sections from the inventory file for which your cluster has no hosts. +- If you are running `ansible-playbook` directly on the dashboard (`ceph-grafana`) host, its inventory entry should look like: + ``` + [ceph-grafana] + cephmetrics.example.com ansible_connection=local + ``` + +## Roles +- [ceph-collectd](./roles/ceph-collectd/): Used for ceph cluster hosts +- [ceph-graphite](./roles/ceph-graphite/): Used for the dashboard host +- [ceph-grafana](./roles/ceph-grafana/): Used for the dashboard host + +## Variables +You may override certain variables by creating a `vars.yml` file: +- `ansible_ssh_user`: The user account use for SSH connections. This may also be set on a per-host basis in the inventory file. +- `cluster`: The name of the Ceph cluster. Default: ceph +- `firewalld_zone`: The `firewalld` zone to use when opening ports for Grafana and Carbon. Default: public +- `devel_mode`: Whether to perform a development-mode deployment vs. a production deployment. Default: true +- `whisper`: May be used to configure [whisper retention](http://graphite.readthedocs.io/en/latest/config-carbon.html#storage-schemas-conf) settings. Default: + ``` + whisper: + retention: + - ['10s', '7d'] + - ['1m', '30d'] + - ['15m', '5y'] + ``` +- `update_alerts`: Whether to update the alerts dashboard along with the rest. Removes any user-defined alerts. Default: false +- `custom_repos`: A list of custom package repositories to enable. Currently supports yum systems only. Format: + ``` + custom_repos: + yum: + - name: my_repo + baseurl: http://example.com/my/repo + ``` + +These variables are only relevent when `devel_mode` is true: +- `use_epel`: Whether or not to use EPEL and grafana.com instead of ceph.com-sourced packages for dependencies. Default: false + +## Current Limitations + +- Currently, metrics are only *displayed* for `osd` and `rgw` hosts. +- Authentication for grafana and graphite is fixed and creates a user `admin` with password `admin`. +- Services are deployed on the dashboard host directly; there is not yet support for a containerized deployment. + +## Usage +If you are not overriding any variables: +``` + ansible-playbook -v -i ./inventory +``` +Or, if you are: +``` + ansible-playbook -v -i ./inventory -e '@vars.yml' +``` diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..2f4d621 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,7 @@ +[defaults] +roles_path: ./roles/ +host_key_checking: False +forks: 50 + +[ssh_connection] +pipelining=True diff --git a/ansible/inventory.sample b/ansible/inventory.sample new file mode 100644 index 0000000..d3e8301 --- /dev/null +++ b/ansible/inventory.sample @@ -0,0 +1,18 @@ +[ceph-grafana] +grafana_host.example.com + +[osds] +osd0.example.com +osd1.example.com +osd2.example.com + +[mons] +mon0.example.com +mon1.example.com +mon2.example.com + +[mdss] +mds0.example.com + +[rgws] +rgw0.example.com diff --git a/ansible/playbook.yml b/ansible/playbook.yml new file mode 100644 index 0000000..5cdbf63 --- /dev/null +++ b/ansible/playbook.yml @@ -0,0 +1,78 @@ +--- +- hosts: all + gather_facts: true + any_errors_fatal: true + tags: + - always + +- hosts: + - mgrs + become: true + roles: + - ceph-mgr + +- hosts: + - ceph-grafana + become: true + roles: + - ceph-prometheus + tags: + - prometheus + +- hosts: + - ceph-grafana + become: true + roles: + - ceph-graphite + +- hosts: + - ceph-grafana + become: true + roles: + - ceph-grafana + tags: + - grafana + +- hosts: + # These are roles used by ceph-ansible + - mons + - agents + - osds + - mdss + - rgws + - nfss + - restapis + - rbdmirrors + - clients + - mgrs + # This role is (so far) only used for testing + - cluster + become: true + roles: + - ceph-node-exporter + +- hosts: + # These are roles used by ceph-ansible + - mons + - agents + - osds + - mdss + - rgws + - nfss + - restapis + - rbdmirrors + - clients + - mgrs + # This role is (so far) only used for testing + - cluster + become: true + roles: + - ceph-collectd + +- hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Print dashboard URL + debug: + msg: "All done! You may access your dashboard at http://{{ groups['ceph-grafana'][0] }}:3000/ with user 'admin' and password 'admin'. Note that initially the dashboard will be incomplete; after a few minutes it should have enough data collected to function properly." diff --git a/ansible/purge.yml b/ansible/purge.yml new file mode 100644 index 0000000..f640505 --- /dev/null +++ b/ansible/purge.yml @@ -0,0 +1,90 @@ +--- +- name: purge grafana host + hosts: + - ceph-grafana + become: true + tasks: + - name: Stop and disable services + service: + name: "{{ item }}" + enabled: no + state: stopped + with_items: + - grafana-server + - carbon-cache + - httpd + failed_when: false + + - name: Remove packages + package: + name: "{{ item }}" + state: absent + with_items: + - graphite-web + - python-carbon + - grafana + - cephmetrics + + - name: Remove files + file: + dest: "{{ item }}" + state: absent + with_items: + - /var/lib/graphite + - /var/lig/graphite-web + - /var/lib/grafana + - /var/lib/carbon + - /etc/grafana/grafana.ini + - /etc/carbon/storage-schemas.conf + - /etc/httpd/conf.d/graphite-web.conf + - /etc/yum.repos.d/cephmetrics.repo + - /etc/yum.repos.d/grafana.repo + - /tmp/dashboard.yml + - /tmp/dashUpdater.py + - /tmp/dashboards + +- name: purge collectd hosts + hosts: + # These are roles used by ceph-ansible + - mons + - agents + - osds + - mdss + - rgws + - nfss + - restapis + - rbdmirrors + - clients + - mgrs + # This role is (so far) only used for testing + - cluster + become: true + tasks: + - name: Stop and disable collectd + service: + name: collectd + enabled: no + state: stopped + failed_when: false + + - name: Remove packages + package: + name: "{{ item }}" + state: absent + with_items: + - cephmetrics-collectors + - collectd + + - name: Remove files + file: + dest: "{{ item }}" + state: absent + with_items: + - /etc/collectd.d/cephmetrics.conf + - /etc/collectd.d/cpu.conf + - /etc/collectd.d/memory.conf + - /etc/collectd.d/nics.conf + - /etc/collectd.d/write_graphite.conf + - /etc/collectd.conf + - /etc/yum.repos.d/cephmetrics.repo + - /usr/lib64/collectd diff --git a/ansible/roles/ceph-collectd/defaults/main.yml b/ansible/roles/ceph-collectd/defaults/main.yml new file mode 100644 index 0000000..eb64af1 --- /dev/null +++ b/ansible/roles/ceph-collectd/defaults/main.yml @@ -0,0 +1,23 @@ +--- +defaults: + cluster: ceph + collector_dependencies: + yum: + # For the json python module + - python-libs + # For the rados python module + - python-rados + # For the ceph_daemon python module + - ceph-common + # For semodule + - make + - libsemanage-python + - policycoreutils-python + - selinux-policy-devel + apt: + # For the json module, via libpython2.7-stdlib + - python2.7 + # For the rados python module + - python-rados + # For the ceph_daemon python module + - ceph-common diff --git a/ansible/roles/ceph-collectd/files/cephmetrics.py b/ansible/roles/ceph-collectd/files/cephmetrics.py new file mode 120000 index 0000000..8de2567 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics.py @@ -0,0 +1 @@ +../../../../cephmetrics.py \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/cephmetrics.te b/ansible/roles/ceph-collectd/files/cephmetrics.te new file mode 120000 index 0000000..4d66f82 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics.te @@ -0,0 +1 @@ +../../../../selinux/cephmetrics.te \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/collectors b/ansible/roles/ceph-collectd/files/collectors new file mode 120000 index 0000000..5efd446 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/collectors @@ -0,0 +1 @@ +../../../../collectors \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/etc b/ansible/roles/ceph-collectd/files/etc new file mode 120000 index 0000000..5221ea5 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/etc @@ -0,0 +1 @@ +../../../../etc \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/handlers/main.yml b/ansible/roles/ceph-collectd/handlers/main.yml new file mode 100644 index 0000000..bd989ee --- /dev/null +++ b/ansible/roles/ceph-collectd/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart collectd + service: + name: collectd + enabled: yes + state: restarted diff --git a/ansible/roles/ceph-collectd/meta/main.yml b/ansible/roles/ceph-collectd/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-collectd/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-collectd/tasks/configure_collectd.yml b/ansible/roles/ceph-collectd/tasks/configure_collectd.yml new file mode 100644 index 0000000..9a295db --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/configure_collectd.yml @@ -0,0 +1,55 @@ +--- +- name: Ship collectd.conf + copy: + src: files/etc/collectd.conf + dest: "{{ collectd_conf }}" + notify: Restart collectd + +- name: Set PluginDir in collectd.conf + replace: + dest: "{{ collectd_conf }}" + regexp: 'PluginDir ".*"' + replace: 'PluginDir "{{ collectd_dir }}"' + notify: Restart collectd + +- name: Set Include path in collectd.conf + replace: + dest: "{{ collectd_conf }}" + regexp: 'Include ".*"' + replace: 'Include "{{ collectd_conf_d }}/*.conf"' + notify: Restart collectd + +- name: Ship /etc/collectd.d or /etc/collectd/collectd.conf.d + copy: + src: files/etc/collectd.d/ + dest: "{{ collectd_conf_d }}" + notify: Restart collectd + +- name: Set hostname in write_graphite.conf + replace: + dest: "{{ collectd_conf_d }}/write_graphite.conf" + regexp: 'Host ".*"' + replace: "Host \"{{ groups['ceph-grafana'][0] }}\"" + notify: Restart collectd + +- name: Set cluster name in cephmetrics.conf + replace: + dest: "{{ collectd_conf_d }}/cephmetrics.conf" + regexp: 'ClusterName ".*"' + replace: 'ClusterName "{{ cluster }}"' + notify: Restart collectd + +- name: Set EventURL in cephmetrics.conf + lineinfile: + dest: "{{ collectd_conf_d }}/cephmetrics.conf" + regexp: 'EventURL ".*"' + insertafter: 'ClusterName ".*"' + line: " EventURL \"http://{{ groups['ceph-grafana'][0] }}:{{ graphite.web_port if graphite.service == 'graphite-web' else graphite.api_port }}/events/\"" + notify: Restart collectd + +- name: Set ModulePath in cephmetrics.conf + replace: + dest: "{{ collectd_conf_d }}/cephmetrics.conf" + regexp: 'ModulePath ".*"' + replace: 'ModulePath "{{ collectd_cephmetrics_dir }}"' + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml b/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml new file mode 100644 index 0000000..8b4e61b --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml @@ -0,0 +1,17 @@ +--- +- name: Create collectors directory + file: + name: "{{ collectd_cephmetrics_dir }}/collectors" + state: directory + +- name: Ship collector plugins + copy: + src: files/collectors/ + dest: "{{ collectd_cephmetrics_dir }}/collectors" + notify: Restart collectd + +- name: Ship cephmetrics.py + copy: + src: files/cephmetrics.py + dest: "{{ collectd_cephmetrics_dir }}" + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/install_packages.yml b/ansible/roles/ceph-collectd/tasks/install_packages.yml new file mode 100644 index 0000000..b290ac3 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/install_packages.yml @@ -0,0 +1,36 @@ +--- +- name: Install collectd + package: + name: collectd + state: latest + when: + - devel_mode + notify: Restart collectd + +- name: Install collectd-python + package: + name: collectd-python + state: latest + when: + - ansible_pkg_mgr == "yum" + - devel_mode + - not use_epel + notify: Restart collectd + +- name: Install cephmetrics-collectors + package: + name: cephmetrics-collectors + state: latest + when: + - ansible_pkg_mgr == "yum" + - not devel_mode + notify: Restart collectd + +- name: Install dependencies for collector plugins + package: + name: "{{ item }}" + state: latest + with_items: "{{ collector_dependencies[ansible_pkg_mgr] }}" + when: + - devel_mode + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/main.yml b/ansible/roles/ceph-collectd/tasks/main.yml new file mode 100644 index 0000000..8e9e804 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/main.yml @@ -0,0 +1,50 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: Skip if not configured + meta: end_play + when: backend.metrics != 'cephmetrics' + +- include: setup_repos.yml + when: + - not containerized_deployment + tags: + - packages + - repos + +- include: install_packages.yml + when: not containerized_deployment + tags: + - packages + +- include: set_collectd_vars.yml + tags: + - collectors + +- include: install_collectd_plugins.yml + when: + - not containerized_deployment + - devel_mode + tags: + - collectors + +- include: configure_collectd.yml + when: not containerized_deployment + tags: + - collectors + +- include: selinux.yml + when: + - not containerized_deployment + - ansible_pkg_mgr == "yum" + - ansible_selinux.status is defined + - ansible_selinux.status == 'enabled' + tags: + - selinux + +- include: start_collectd.yml + when: not containerized_deployment + tags: + - services diff --git a/ansible/roles/ceph-collectd/tasks/merge_vars.yml b/ansible/roles/ceph-collectd/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/tasks/selinux.yml b/ansible/roles/ceph-collectd/tasks/selinux.yml new file mode 100644 index 0000000..80885e3 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/selinux.yml @@ -0,0 +1,16 @@ +--- +- name: Enable collectd_tcp_network_connect SELinux boolean + seboolean: + name: collectd_tcp_network_connect + state: yes + persistent: yes + +- name: Restore SELinux context of OSD journals + shell: "restorecon -R -v /var/lib/ceph/osd/*/journal" + when: "'osds' in group_names" + register: restorecon + changed_when: restorecon.stdout|length != 0 or restorecon.stderr|length != 0 + +- include: selinux_module.yml + when: + - devel_mode diff --git a/ansible/roles/ceph-collectd/tasks/selinux_module.yml b/ansible/roles/ceph-collectd/tasks/selinux_module.yml new file mode 100644 index 0000000..715250c --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/selinux_module.yml @@ -0,0 +1,28 @@ +--- +- name: Remove SELinux policy package + command: semodule -r cephmetrics + failed_when: false + +- name: Remove any SELinux-related files + file: + path: "{{ item }}" + state: absent + with_items: + - /tmp/cephmetrics.fc + - /tmp/cephmetrics.if + - /tmp/cephmetrics.pp + - /tmp/cephmetrics.te + +- name: Copy SELinux type enforcement file + copy: + src: cephmetrics.te + dest: /tmp/cephmetrics.te + +- name: Build SELinux policy package + command: make -f /usr/share/selinux/devel/Makefile cephmetrics.pp + args: + chdir: /tmp + +- name: Load SELinux policy package + command: semodule -i /tmp/cephmetrics.pp + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/set_collectd_vars.yml b/ansible/roles/ceph-collectd/tasks/set_collectd_vars.yml new file mode 100644 index 0000000..8090240 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/set_collectd_vars.yml @@ -0,0 +1,17 @@ +--- +- name: Set collectd_dir + set_fact: + collectd_dir: "/usr/lib{{ '64' if ansible_pkg_mgr == 'yum' else '' }}/collectd" + +- name: Set collectd_cephmetrics_dir + set_fact: + collectd_cephmetrics_dir: "{{ collectd_dir }}/cephmetrics" + +- name: Set collectd_conf + set_fact: + collectd_conf: "/etc/{{ 'collectd/' if ansible_pkg_mgr == 'apt' else '' }}collectd.conf" + +- name: Set collectd_conf_d + set_fact: + collectd_conf_d: "/etc/collectd{{ '/collectd.conf' if ansible_pkg_mgr == 'apt' else '' }}.d" + diff --git a/ansible/roles/ceph-collectd/tasks/setup_repos.yml b/ansible/roles/ceph-collectd/tasks/setup_repos.yml new file mode 100644 index 0000000..d674850 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/setup_repos.yml @@ -0,0 +1,24 @@ +--- +- name: Add collectd repo + template: + src: collectd.list + dest: /etc/apt/sources.list.d/collectd.list + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Add collectd.org GPG key to apt + apt_key: + id: 3994D24FB8543576 + state: present + keyserver: ha.pool.sks-keyservers.net + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Update apt cache + apt: + update_cache: true + when: + - ansible_pkg_mgr == 'apt' + - devel_mode diff --git a/ansible/roles/ceph-collectd/tasks/start_collectd.yml b/ansible/roles/ceph-collectd/tasks/start_collectd.yml new file mode 100644 index 0000000..ec4d7c4 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/start_collectd.yml @@ -0,0 +1,6 @@ +--- +- name: Start collectd + service: + name: collectd + enabled: yes + state: started diff --git a/ansible/roles/ceph-collectd/templates/collectd.list b/ansible/roles/ceph-collectd/templates/collectd.list new file mode 100644 index 0000000..5a12f86 --- /dev/null +++ b/ansible/roles/ceph-collectd/templates/collectd.list @@ -0,0 +1 @@ +deb http://pkg.ci.collectd.org/deb {{ ansible_distribution_release }} collectd-5.7 diff --git a/ansible/roles/ceph-collectd/tests/test_collectd.py b/ansible/roles/ceph-collectd/tests/test_collectd.py new file mode 100644 index 0000000..6532378 --- /dev/null +++ b/ansible/roles/ceph-collectd/tests/test_collectd.py @@ -0,0 +1,19 @@ +import pytest + +testinfra_hosts = ['!ceph-grafana'] + + +class TestCollectd(object): + def maybe_skip(self, host): + vars = host.ansible.get_variables() + if vars.get('backend', dict()).get('metrics', 'mgr') != 'cephmetrics': + pytest.skip() + + def test_service_enabled(self, host): + self.maybe_skip(host) + assert host.service('collectd').is_enabled + assert host.service('collectd').is_running + + def test_logfile_present(self, host): + self.maybe_skip(host) + assert host.file('/var/log/collectd-cephmetrics.log').is_file diff --git a/ansible/roles/ceph-docker/defaults/main.yml b/ansible/roles/ceph-docker/defaults/main.yml new file mode 100644 index 0000000..e31960c --- /dev/null +++ b/ansible/roles/ceph-docker/defaults/main.yml @@ -0,0 +1,12 @@ +--- +defaults: + docker: + packages: + yum: + - docker + - docker-python + apt: + - docker.io + - python-docker + network_name: cephmetrics + service_name: docker diff --git a/ansible/roles/ceph-docker/meta/main.yml b/ansible/roles/ceph-docker/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-docker/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-docker/tasks/install_packages.yml b/ansible/roles/ceph-docker/tasks/install_packages.yml new file mode 100644 index 0000000..1c423fa --- /dev/null +++ b/ansible/roles/ceph-docker/tasks/install_packages.yml @@ -0,0 +1,6 @@ +--- +- name: Install packages + package: + name: "{{ item }}" + state: latest + with_items: "{{ docker.packages[ansible_pkg_mgr] }}" diff --git a/ansible/roles/ceph-docker/tasks/main.yml b/ansible/roles/ceph-docker/tasks/main.yml new file mode 100644 index 0000000..0d9be3b --- /dev/null +++ b/ansible/roles/ceph-docker/tasks/main.yml @@ -0,0 +1,23 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: End play if no containers are desired + meta: end_play + when: + - not grafana.containerized + - not prometheus.containerized + +- include: install_packages.yml + tags: + - packages + +- include: start_services.yml + tags: + - services + +- name: Create a network + docker_network: + name: "{{ docker.network_name }}" + driver: bridge diff --git a/ansible/roles/ceph-docker/tasks/merge_vars.yml b/ansible/roles/ceph-docker/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-docker/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-docker/tasks/start_services.yml b/ansible/roles/ceph-docker/tasks/start_services.yml new file mode 100644 index 0000000..b5c5ce4 --- /dev/null +++ b/ansible/roles/ceph-docker/tasks/start_services.yml @@ -0,0 +1,12 @@ +--- +- name: Reload systemd + systemd: + daemon_reload: yes + tags: + - services + +- name: Enable and start services + service: + name: "{{ docker.service_name }}" + state: started + enabled: true diff --git a/ansible/roles/ceph-docker/tests/test_docker.py b/ansible/roles/ceph-docker/tests/test_docker.py new file mode 100644 index 0000000..2508858 --- /dev/null +++ b/ansible/roles/ceph-docker/tests/test_docker.py @@ -0,0 +1,19 @@ +import pytest + +testinfra_hosts = ['ceph-grafana'] + + +class TestDocker(object): + def maybe_skip(self, host): + services = ['grafana', 'prometheus'] + + def is_containerized(service): + vars = host.ansible.get_variables() + return vars.get(service, dict()).get('containerized') + if not any(map(is_containerized, services)): + pytest.skip() + + def test_docker_running(self, host): + self.maybe_skip(host) + assert host.service('docker').is_enabled + assert host.service('docker').is_running diff --git a/ansible/roles/ceph-grafana/defaults/main.yml b/ansible/roles/ceph-grafana/defaults/main.yml new file mode 100644 index 0000000..e8fc348 --- /dev/null +++ b/ansible/roles/ceph-grafana/defaults/main.yml @@ -0,0 +1,19 @@ +--- +defaults: + # graphite defaults are now in the cephmetrics-common role since the + # ceph-collectd role needs access to them + replace_dashboards: true + update_alerts: false + grafana: + containerized: false + container_name: "grafana/grafana" + # version currently only applies to containers + version: 4.6.3 + datasource: Local + # Note: changing this value won't update the password in Grafana itself; + # change it via the web UI and then override this value to reflect. + user: admin + admin_password: admin + plugins: + - vonage-status-panel + - grafana-piechart-panel diff --git a/ansible/roles/ceph-grafana/files/dashUpdater.py b/ansible/roles/ceph-grafana/files/dashUpdater.py new file mode 120000 index 0000000..a7871c4 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashUpdater.py @@ -0,0 +1 @@ +../../../../dashUpdater.py \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/dashboards b/ansible/roles/ceph-grafana/files/dashboards new file mode 120000 index 0000000..dd1dc60 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards @@ -0,0 +1 @@ +../../../../dashboards/ \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/grafana-server.service b/ansible/roles/ceph-grafana/files/grafana-server.service new file mode 100644 index 0000000..ef04af0 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/grafana-server.service @@ -0,0 +1,15 @@ +[Unit] +Description=grafana-server +After=docker.service + +[Service] +EnvironmentFile=-/etc/environment +ExecStart=/usr/bin/docker start --attach grafana-server +ExecStop=-/usr/bin/docker stop grafana-server +Restart=always +RestartSec=10s +TimeoutStartSec=120 +TimeoutStopSec=15 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/ceph-grafana/files/grafana.ini b/ansible/roles/ceph-grafana/files/grafana.ini new file mode 120000 index 0000000..d27ea1e --- /dev/null +++ b/ansible/roles/ceph-grafana/files/grafana.ini @@ -0,0 +1 @@ +../../../../etc/grafana/grafana.ini \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/grafana.list b/ansible/roles/ceph-grafana/files/grafana.list new file mode 100644 index 0000000..886da8d --- /dev/null +++ b/ansible/roles/ceph-grafana/files/grafana.list @@ -0,0 +1 @@ +deb https://packagecloud.io/grafana/stable/debian/ jessie main diff --git a/ansible/roles/ceph-grafana/handlers/main.yml b/ansible/roles/ceph-grafana/handlers/main.yml new file mode 100644 index 0000000..cb5200f --- /dev/null +++ b/ansible/roles/ceph-grafana/handlers/main.yml @@ -0,0 +1,8 @@ +--- +- name: Enable service + # We use the systemd module here so we can use the daemon_reload feature, + # since we're shipping the .service file ourselves + systemd: + name: grafana-server + daemon_reload: true + enabled: true diff --git a/ansible/roles/ceph-grafana/meta/main.yml b/ansible/roles/ceph-grafana/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-grafana/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-grafana/tasks/configure_firewall.yml b/ansible/roles/ceph-grafana/tasks/configure_firewall.yml new file mode 100644 index 0000000..e2278eb --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_firewall.yml @@ -0,0 +1,19 @@ +--- +- name: Check firewalld status + shell: "systemctl show firewalld | grep UnitFileState" + register: firewalld_status + failed_when: false + changed_when: false + tags: + - skip_ansible_lint + +- name: Open ports for Grafana + firewalld: + port: "{{ item }}" + zone: "{{ firewalld_zone }}" + state: enabled + immediate: true + permanent: true + with_items: + - 3000/tcp + when: "'enabled' in firewalld_status.stdout" diff --git a/ansible/roles/ceph-grafana/tasks/configure_grafana.yml b/ansible/roles/ceph-grafana/tasks/configure_grafana.yml new file mode 100644 index 0000000..50c7bde --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_grafana.yml @@ -0,0 +1,143 @@ +--- +- name: Make sure grafana is down + service: + name: grafana-server + state: stopped + +- name: Wait for grafana to be stopped + wait_for: + port: 3000 + state: stopped + +- name: Write grafana.ini + copy: + src: files/grafana.ini + dest: /etc/grafana/grafana.ini + owner: root + group: grafana + mode: 0640 + tags: [ini] + +- name: Set owner on grafana.ini + file: + path: /etc/grafana + state: directory + # This is the UID used by the grafana/grafana container + owner: 104 + recurse: true + when: + - grafana.containerized + +- name: Set domain in grafana.ini + lineinfile: + dest: /etc/grafana/grafana.ini + regexp: "^domain = .*" + insertafter: "^;domain = .*" + line: "domain = {{ ansible_fqdn }}" + tags: [ini] + +- name: Set admin_password in grafana.ini + lineinfile: + dest: /etc/grafana/grafana.ini + regexp: "^admin_password = .*" + insertafter: "^;admin_password = .*" + line: "admin_password = {{ grafana.admin_password }}" + no_log: true + tags: [ini] + +- include: grafana_plugins.yml + when: + - devel_mode + - not grafana.containerized + +- name: Enable and start grafana + service: + name: grafana-server + state: restarted + enabled: true + +- name: Wait for grafana to start + wait_for: + port: 3000 + +- name: Set grafana_data_source + set_fact: + grafana_data_source: > + { + "name":"{{ grafana.datasource }}", + "type":"graphite", + "url":"http://localhost:{{ graphite.web_port if graphite.service == 'graphite-web' else graphite.api_port }}", + "access":"proxy", + "basicAuth":false, + "isDefault":true + } + when: backend.storage == 'graphite' + +- name: Set grafana_data_source + set_fact: + grafana_data_source: > + { + "name":"{{ grafana.datasource }}", + "type":"prometheus", + "url":"http://cephmetrics-prometheus:9090", + "access":"proxy", + "basicAuth":false, + "isDefault":true + } + when: backend.storage == 'prometheus' + +- name: Add data source + uri: + url: http://localhost:3000/api/datasources + method: POST + user: admin + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + body_format: json + body: "{{ grafana_data_source }}" + # If we get a 409 Conflict, it means we're already set up. We'll update + # after this. + status_code: 200,409 + register: grafana_data_source_result + # If grafana is containerized, it may take a few more seconds to start after + # the port is open. + until: grafana_data_source_result.status != -1 + retries: 6 + delay: 5 + +- name: Get datasource ID + uri: + url: "http://localhost:3000/api/datasources/id/{{ grafana.datasource }}" + method: GET + user: "{{ grafana.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + register: grafana_data_source_id + when: grafana_data_source_result is defined and grafana_data_source_result.status == 409 + +- name: Update datasource + uri: + url: "http://localhost:3000/api/datasources/{{ grafana_data_source_id.json.id }}" + method: PUT + user: "{{ grafana.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + body_format: json + body: "{{ grafana_data_source }}" + status_code: 200 + when: grafana_data_source_result is defined and grafana_data_source_result.status == 409 + +- include: push_dashboards_dashupdater.yml + when: + - not grafana.containerized + - backend.metrics == "cephmetrics" + - backend.storage == "graphite" + tags: + - dashboards + +- include: push_dashboards.yml + when: + - backend.metrics == "mgr" + - backend.storage == "prometheus" + tags: + - dashboards diff --git a/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml b/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml new file mode 100644 index 0000000..bb5838e --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml @@ -0,0 +1,26 @@ +--- +- name: Create Grafana plugins directory + file: + name: /var/lib/grafana/plugins + state: directory + +- name: Install Grafana plugins + command: "grafana-cli plugins install {{ item }}" + with_items: "{{ grafana.plugins }}" + +- name: Update Grafana plugins + command: "grafana-cli plugins update {{ item }}" + with_items: "{{ grafana.plugins }}" + +- name: Update status-panel for readability within the 'light' theme (older versions) + command: "sed -i.bak -e 's/green/rgb(1,167,1)/g' /var/lib/grafana/plugins/vonage-status-panel/dist/css/status_panel.css" + tags: + - skip_ansible_lint + +- name: Update status-panel for readability within the 'light' theme (newer versions) + replace: + path: "/var/lib/grafana/plugins/vonage-status-panel/dist/status_ctrl.js" + regexp: "ok: 'rgba\\(50, 128, 45, 0\\.9\\)'," + replace: "ok: 'rgb(1,167,1)'," + backup: no + failed_when: false diff --git a/ansible/roles/ceph-grafana/tasks/install_packages.yml b/ansible/roles/ceph-grafana/tasks/install_packages.yml new file mode 100644 index 0000000..9dfdbeb --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/install_packages.yml @@ -0,0 +1,13 @@ +--- +- name: Install packages + package: + name: "{{ item }}" + state: latest + with_items: "{{ devel_packages[ansible_pkg_mgr] }}" + when: devel_mode + +- name: Install cephmetrics + package: + name: cephmetrics + state: latest + when: not devel_mode diff --git a/ansible/roles/ceph-grafana/tasks/main.yml b/ansible/roles/ceph-grafana/tasks/main.yml new file mode 100644 index 0000000..cbb3e7c --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/main.yml @@ -0,0 +1,36 @@ +--- +- include: merge_vars.yml + tags: + - always + +- include: sanity.yml + tags: + - sanity + +- include: setup_container.yml + when: + - grafana.containerized + +- include: setup_repos.yml + when: + - not grafana.containerized + tags: + - packages + - repos + +- include: install_packages.yml + when: not grafana.containerized + tags: + - packages + +- include: configure_firewall.yml + tags: + - firewall + +- include: configure_grafana.yml + tags: + - grafana + +- include: start_services.yml + tags: + - services diff --git a/ansible/roles/ceph-grafana/tasks/merge_vars.yml b/ansible/roles/ceph-grafana/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/tasks/push_dashboards.yml b/ansible/roles/ceph-grafana/tasks/push_dashboards.yml new file mode 100644 index 0000000..545032c --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/push_dashboards.yml @@ -0,0 +1,67 @@ +--- +# First, look at our local filesystem for the dashboards we need to push. +- name: Stat dashboard files + local_action: + module: stat + path: "{{ item }}" + follow: true + get_attributes: false + get_checksum: false + get_md5: false + get_mime: false + connection: local + become: false + with_fileglob: "dashboards/{{ backend.metrics }}-{{ backend.storage }}/*.json" + register: dashboard_files + no_log: true + +# Before we do any of the shenanigans further down this file, we need to create +# an object to popluate with dashboard information +- name: Create dashboards var + set_fact: + dashboards: {} + +# The below populates the dashboards dict to look like: +# dashboards: +# ceph-at-a-glance: +# body: +# ... and so on +- name: Set dashboards var keys + set_fact: + dashboards: "{% for item in dashboard_files.results %}{{ dashboards.update({item.stat.path.split('/')[-1].split('.')[:-1]|join('.'): {'body': lookup('file', item.stat.path)|from_json }}) }}{% endfor %}{{ dashboards }}" + no_log: true + +# If the dashboards have been previously deployed, we need the ID so that we +# can update instead of adding another copy. So first we grab that. +- name: Get dashboard IDs + uri: + url: "http://localhost:3000/api/dashboards/db/{{ item }}" + method: GET + user: "{{ grafana.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + status_code: 200,404 + with_items: "{{ dashboards.keys() }}" + register: dashboard_ids + no_log: true + +# Next, take any IDs we found and set them in each dashboard body. +# Any dashboards which don't exist on the server will be skipped. +- name: Add IDs to dashboards var + set_fact: + dashboards: "{% if item.status == 200 %}{{ dashboards[item.json.meta.slug].body.update({'id': item.json.dashboard.id}) }}{% endif %}{{ dashboards }}" + with_items: "{{ dashboard_ids.results }}" + no_log: true + +# Now we can actually push the dashboards! +- name: Push updated dashboards + uri: + url: "http://localhost:3000/api/dashboards/db" + method: POST + user: "{{ grafana.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + status_code: 200 + body_format: json + body: '{"dashboard": {{ dashboards[item].body|tojson }}, "overwrite": true}' + with_items: "{{ dashboards.keys() }}" diff --git a/ansible/roles/ceph-grafana/tasks/push_dashboards_dashupdater.yml b/ansible/roles/ceph-grafana/tasks/push_dashboards_dashupdater.yml new file mode 100644 index 0000000..d95fff8 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/push_dashboards_dashupdater.yml @@ -0,0 +1,41 @@ +--- +- name: Ship dashboard templates + copy: + src: files/dashboards/cephmetrics-graphite + dest: /tmp/dashboards + when: + - devel_mode + +- name: Ship dashUpdater.py + copy: + src: files/dashUpdater.py + dest: /tmp/dashUpdater.py + when: + - devel_mode + +- name: Write dashboard.yml + template: + src: dashboard.yml + dest: /tmp/dashboard.yml + mode: 0600 + +- name: Set dashupdate_cmd and dashboard_dir + set_fact: + dashupdate_cmd: "python /tmp/dashUpdater.py" + dashboard_dir: "/tmp/dashboards" + when: + - devel_mode + +- name: Set dashupdate_cmd and dashboard_dir + set_fact: + dashupdate_cmd: "/usr/libexec/cephmetrics/dashUpdater.py" + dashboard_dir: "/usr/share/cephmetrics/dashboards/cephmetrics-graphite" + when: + - not devel_mode + +- name: Set dashupdate_mode + set_fact: + dashupdate_mode: "{{ 'refresh' if replace_dashboards else 'update' }}" + +- name: Push dashboards to Grafana + command: "{{ dashupdate_cmd }} -m {{ dashupdate_mode }} -c /tmp/dashboard.yml -D {{ dashboard_dir }}{{ ' -A' if update_alerts else '' }}" diff --git a/ansible/roles/ceph-grafana/tasks/sanity.yml b/ansible/roles/ceph-grafana/tasks/sanity.yml new file mode 100644 index 0000000..ad901a8 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/sanity.yml @@ -0,0 +1,6 @@ +--- +- name: Fail when hosts are specified by IP address + fail: + msg: "All hosts must be referred to by an FQDN!" + when: item|ipaddr + with_items: "{{ groups.all }}" diff --git a/ansible/roles/ceph-grafana/tasks/setup_container.yml b/ansible/roles/ceph-grafana/tasks/setup_container.yml new file mode 100644 index 0000000..9ce4ace --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/setup_container.yml @@ -0,0 +1,53 @@ +--- +- name: Include ceph-docker + include_role: + name: ceph-docker + allow_duplicates: false + when: grafana.containerized + tags: docker + +- name: Create grafana user + user: + name: grafana + shell: '/bin/false' + createhome: false + system: true + +- name: Create /etc/grafana + file: + path: /etc/grafana + state: directory + owner: root + +- name: Create /var/lib/grafana + file: + path: /var/lib/grafana + state: directory + owner: root + +- name: Create docker container + docker_container: + name: grafana-server + image: "{{ grafana.container_name }}:{{ grafana.version }}" + state: present + restart_policy: no + published_ports: '3000:3000' + detach: true + volumes: + - "/etc/grafana:/etc/grafana:Z" + - "/var/lib/grafana:/var/lib/grafana:Z" + networks: + - name: "{{ docker.network_name }}" + keep_volumes: true + pull: true + env: + GF_INSTALL_PLUGINS: "{{ grafana.plugins|join(',') }}" + +- name: Ship systemd service + copy: + src: grafana-server.service + dest: "/etc/systemd/system/" + owner: root + group: root + mode: 0644 + notify: Enable service diff --git a/ansible/roles/ceph-grafana/tasks/setup_repos.yml b/ansible/roles/ceph-grafana/tasks/setup_repos.yml new file mode 100644 index 0000000..11fb453 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/setup_repos.yml @@ -0,0 +1,43 @@ +--- +- name: Add Grafana repo + template: + src: grafana.repo + dest: /etc/yum.repos.d/grafana.repo + when: + - ansible_pkg_mgr == 'yum' + - use_epel + - devel_mode + +- name: Remove Grafana repo + file: + path: /etc/yum.repos.d/grafana.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - not use_epel + - devel_mode + +- name: Install grafana repo + copy: + src: files/grafana.list + dest: /etc/apt/sources.list.d/grafana.list + when: + - ansible_pkg_mgr == "apt" + - devel_mode + +- name: Add packagecloud GPG key to apt + apt_key: + # This is the key used by the grafana repo + url: https://packagecloud.io/gpg.key + id: D59097AB + state: present + when: + - ansible_pkg_mgr == "apt" + - devel_mode + +- name: Update apt cache + apt: + update_cache: true + when: + - ansible_pkg_mgr == 'apt' + - devel_mode diff --git a/ansible/roles/ceph-grafana/tasks/start_services.yml b/ansible/roles/ceph-grafana/tasks/start_services.yml new file mode 100644 index 0000000..7bd485c --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/start_services.yml @@ -0,0 +1,15 @@ +--- +- name: Reload systemd + systemd: + daemon_reload: yes + # Even when just calling daemon-reload, ansible < 2.4 requires a name for this call + name: grafana-server + tags: + - packages + - services + +- name: Enable and start services + service: + name: grafana-server + state: started + enabled: true diff --git a/ansible/roles/ceph-grafana/templates/dashboard.yml b/ansible/roles/ceph-grafana/templates/dashboard.yml new file mode 100644 index 0000000..d4d3263 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/dashboard.yml @@ -0,0 +1,25 @@ +--- +osd_servers: [{% for fqdn in groups['osds'] | default([]) %}{{ fqdn.split('.')[0] }},{% endfor %}] +rgw_servers: [{% for fqdn in groups['rgws'] | default([]) %}{{ fqdn.split('.')[0] }},{% endfor %}] +domain: "{{ ansible_domain }}" +_dashboards: + - alert-status + - ceph-at-a-glance + - ceph-backend-storage + - ceph-cluster + - ceph-health + - ceph-osd-information + - ceph-pools + - ceph-rgw-workload + - disk-busy-by-server + - iops-by-server + - iscsi-overview + - latency-by-server + - network-usage-by-node + - osd-node-detail +_credentials: + user: admin + password: {{ grafana.admin_password }} +_grafana_port: 3000 +_home_dashboard: ceph-at-a-glance +_alert_dashboard: alert-status diff --git a/ansible/roles/ceph-grafana/templates/grafana.repo b/ansible/roles/ceph-grafana/templates/grafana.repo new file mode 100644 index 0000000..1ba7fb6 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/grafana.repo @@ -0,0 +1,9 @@ +[grafana] +name=grafana +baseurl=https://packagecloud.io/grafana/stable/el/{{ ansible_distribution_major_version }}/$basearch +repo_gpgcheck=1 +enabled=1 +gpgcheck=1 +gpgkey=https://packagecloud.io/gpg.key https://grafanarel.s3.amazonaws.com/RPM-GPG-KEY-grafana +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt diff --git a/ansible/roles/ceph-grafana/tests/test_grafana.py b/ansible/roles/ceph-grafana/tests/test_grafana.py new file mode 100644 index 0000000..e9496e2 --- /dev/null +++ b/ansible/roles/ceph-grafana/tests/test_grafana.py @@ -0,0 +1,32 @@ +import pytest + +testinfra_hosts = ['ceph-grafana'] + + +class TestGrafana(object): + @pytest.mark.parametrize( + "service", + ['grafana-server'] + ) + def test_service_enabled(self, host, service): + """ Are the proper services enabled? """ + if isinstance(service, dict): + service = service[ + host.ansible('setup')['ansible_facts']['ansible_pkg_mgr']] + service = host.service(service) + assert service.is_running + assert service.is_enabled + + @pytest.mark.parametrize( + "proto,iface,port", + [ + ('tcp', '0.0.0.0', '3000'), # grafana + ] + ) + def test_ports_open(self, host, proto, iface, port): + """ Are the proper ports open? """ + socket_spec = "%s://%s" % (proto, iface) + if iface: + socket_spec += ':' + socket_spec += port + assert host.socket(socket_spec).is_listening diff --git a/ansible/roles/ceph-graphite/defaults/main.yml b/ansible/roles/ceph-graphite/defaults/main.yml new file mode 100644 index 0000000..d1511b4 --- /dev/null +++ b/ansible/roles/ceph-graphite/defaults/main.yml @@ -0,0 +1,11 @@ +--- +defaults: + devel_packages: + yum: + - graphite-web + - python-carbon + apt: + - graphite-web + - apache2 + - libapache2-mod-wsgi + - graphite-carbon diff --git a/ansible/roles/ceph-graphite/handlers/main.yml b/ansible/roles/ceph-graphite/handlers/main.yml new file mode 100644 index 0000000..16fc5c6 --- /dev/null +++ b/ansible/roles/ceph-graphite/handlers/main.yml @@ -0,0 +1,30 @@ +--- +- name: Resize whisper databases + # xargs -P parallelizes execution; scale it to the number of cores on the system + shell: "find /var/lib/carbon/whisper -name '*.wsp' -print0 | xargs -n1 -0 -P {{ ansible_processor_vcpus }} -I {} whisper-resize {} {{ whisper_retention.replace(',', ' ') }}" + register: whisper_resize + failed_when: "'Traceback' in whisper_resize.stdout" + no_log: true + +- name: Restart graphite-web + service: + # graphite-web is served by apache + name: "{{ graphite.apache_name[ansible_pkg_mgr] }}" + state: restarted + enabled: true + when: + - "{{ graphite.service == 'graphite-web' }}" + +- name: Restart graphite-api + service: + name: graphite-api + state: restarted + enabled: true + when: + - "{{ graphite.service == 'graphite-api' }}" + +- name: Restart carbon-cache + service: + name: carbon-cache + state: restarted + enabled: true diff --git a/ansible/roles/ceph-graphite/meta/main.yml b/ansible/roles/ceph-graphite/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-graphite/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-graphite/tasks/configure_carbon.yml b/ansible/roles/ceph-graphite/tasks/configure_carbon.yml new file mode 100644 index 0000000..bc37ccb --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/configure_carbon.yml @@ -0,0 +1,37 @@ +--- +- name: Set WHISPER_AUTOFLUSH to True + lineinfile: + dest: /etc/carbon/carbon.conf + regexp: "^WHISPER_AUTOFLUSH = .*" + insertafter: "^#.*buffering writes from the kernel.*" + line: "WHISPER_AUTOFLUSH = True" + notify: + - Restart carbon-cache + +- name: Fail when the the first whisper retention value is not 10s + fail: + msg: "The first whisper retention value must be '10s', not '{{ whisper.retention[0][0] }}' in order to match collectd's interval" + failed_when: whisper.retention[0][0] != '10s' + any_errors_fatal: true + +- name: Set whisper_retention + set_fact: + whisper_retention: "{{ whisper.retention|map('join', ':')|join(',') }}" + +- name: Configure retention for collectd stats + template: + src: storage-schemas.conf + dest: /etc/carbon/storage-schemas.conf + notify: + - Resize whisper databases + - Restart carbon-cache + +- name: Ensure carbon storage has the right ownership + file: + path: "{{ carbon.storage_dir[ansible_pkg_mgr] }}" + state: directory + owner: "{{ carbon.unix_user[ansible_pkg_mgr] }}" + group: "{{ carbon.unix_user[ansible_pkg_mgr] }}" + recurse: yes + notify: + - Restart carbon-cache diff --git a/ansible/roles/ceph-graphite/tasks/configure_firewall.yml b/ansible/roles/ceph-graphite/tasks/configure_firewall.yml new file mode 100644 index 0000000..ef4b5b4 --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/configure_firewall.yml @@ -0,0 +1,19 @@ +--- +- name: Check firewalld status + shell: "systemctl show firewalld | grep UnitFileState" + register: firewalld_status + failed_when: false + changed_when: false + tags: + - skip_ansible_lint + +- name: Open ports for Carbon + firewalld: + port: "{{ item }}" + zone: "{{ firewalld_zone }}" + state: enabled + immediate: true + permanent: true + with_items: + - 2003/tcp + when: "'enabled' in firewalld_status.stdout" diff --git a/ansible/roles/ceph-graphite/tasks/configure_graphite.yml b/ansible/roles/ceph-graphite/tasks/configure_graphite.yml new file mode 100644 index 0000000..e911108 --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/configure_graphite.yml @@ -0,0 +1,8 @@ +--- +- include: configure_graphite_web.yml + when: + - graphite.service == 'graphite-web' + +- include: configure_graphite_api.yml + when: + - graphite.service == 'graphite-api' diff --git a/ansible/roles/ceph-graphite/tasks/configure_graphite_api.yml b/ansible/roles/ceph-graphite/tasks/configure_graphite_api.yml new file mode 100644 index 0000000..2b85f2e --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/configure_graphite_api.yml @@ -0,0 +1,9 @@ +--- +- name: Write graphite-api systemd files + template: + src: "{{ item }}" + dest: "/etc/systemd/system/{{ item }}" + with_items: + - graphite-api.socket + - graphite-api.service + notify: Restart graphite-api diff --git a/ansible/roles/ceph-graphite/tasks/configure_graphite_web.yml b/ansible/roles/ceph-graphite/tasks/configure_graphite_web.yml new file mode 100644 index 0000000..1679d5a --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/configure_graphite_web.yml @@ -0,0 +1,94 @@ +--- +# Older versions of Django use the 'syncdb' method for DB creation/migration. +# Newer versions use 'migrate' but still have 'syncdb' available and +# deprecated. We should be able to auto-discover the correct method. +- name: Determine how to initialize Django DB + command: django-admin --help + register: django_cmd + no_log: true + +- name: Set django_init_method + set_fact: + django_init_method: "{% if 'migrate' in django_cmd.stdout_lines|map('trim')|list %}migrate{% else %}syncdb{% endif %}" + +- name: Create Graphite DB tables + command: "/usr/bin/graphite-manage {{ django_init_method }} --noinput" + become_user: "{{ graphite.unix_user[ansible_pkg_mgr] }}" + register: migrate_cmd + failed_when: migrate_cmd.rc != 0 and 'table "django_content_type" already exists' not in migrate_cmd.stderr + +- name: Run Graphite migrations if necessary + command: /usr/bin/graphite-manage migrate --noinput --fake-initial + become_user: "{{ graphite.unix_user[ansible_pkg_mgr] }}" + when: + - django_init_method == 'migrate' + - migrate_cmd.rc != 0 + +- name: Build Graphite index + command: "{{ graphite.build_index[ansible_pkg_mgr] }}" + become_user: "{{ graphite.unix_user[ansible_pkg_mgr] }}" + +- name: Set or unset port in main apache config + lineinfile: + dest: "/{{ graphite.apache_conf[ansible_pkg_mgr] }}" + regexp: "^Listen .*" + line: "Listen 80" + insertafter: "^#Listen .*" + state: "{{ 'present' if graphite.web_port == '80' else 'absent' }}" + notify: Restart graphite-web + +# This is done automatically during package install on RPM-based distros +- name: Copy graphite-web config on Ubuntu + copy: + src: /usr/share/graphite-web/apache2-graphite.conf + dest: "{{ graphite.site_conf[ansible_pkg_mgr] }}" + remote_src: yes + backup: yes + when: ansible_pkg_mgr == "apt" + notify: Restart graphite-web + +- name: Set port in graphite apache config + lineinfile: + dest: "{{ graphite.site_conf[ansible_pkg_mgr] }}" + regexp: "^Listen .*" + line: "Listen {{ graphite.web_port }}" + insertbefore: "^$" + replace: "" + notify: Restart graphite-web + +- name: Allow connecting to graphite without auth + lineinfile: + dest: "{{ graphite.site_conf[ansible_pkg_mgr] }}" + line: " Require all granted" + insertafter: '' + state: present + when: ansible_pkg_mgr == "yum" + notify: Restart graphite-web + +- name: Get timezone + shell: "timedatectl | grep 'Time zone'" + register: timezone + +- name: Set timezone in local_settings.py + lineinfile: + dest: "{{ graphite.app_conf[ansible_pkg_mgr] }}" + line: "TIME_ZONE = '{{ timezone.stdout.strip().split(' ')[2] }}'" + regexp: "^TIME_ZONE = .*" + insertafter: '^#TIME_ZONE = .*' + state: present + notify: Restart graphite-web diff --git a/ansible/roles/ceph-graphite/tasks/install_packages.yml b/ansible/roles/ceph-graphite/tasks/install_packages.yml new file mode 100644 index 0000000..0b9e1db --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/install_packages.yml @@ -0,0 +1,7 @@ +--- +- name: Install packages + package: + name: "{{ item }}" + state: latest + with_items: "{{ devel_packages[ansible_pkg_mgr] }}" + when: devel_mode diff --git a/ansible/roles/ceph-graphite/tasks/main.yml b/ansible/roles/ceph-graphite/tasks/main.yml new file mode 100644 index 0000000..96748e6 --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/main.yml @@ -0,0 +1,41 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: Skip if not configured + meta: end_play + when: backend.storage != 'graphite' + +- include: install_packages.yml + when: not containerized_deployment + tags: + - packages + +- include: configure_firewall.yml + tags: + - firewall + +- include: configure_carbon.yml + when: not containerized_deployment + tags: + - carbon + +- include: configure_graphite.yml + when: not containerized_deployment + tags: + - graphite + +- name: Reload systemd + systemd: + daemon_reload: yes + # Even when just calling daemon-reload, ansible < 2.4 requires a name for this call + name: carbon-cache + tags: + - packages + - services + +- include: start_services.yml + when: not containerized_deployment + tags: + - services diff --git a/ansible/roles/ceph-graphite/tasks/merge_vars.yml b/ansible/roles/ceph-graphite/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-graphite/tasks/start_services.yml b/ansible/roles/ceph-graphite/tasks/start_services.yml new file mode 100644 index 0000000..69089b6 --- /dev/null +++ b/ansible/roles/ceph-graphite/tasks/start_services.yml @@ -0,0 +1,10 @@ +--- +- name: Enable and start services + service: + name: "{{ item }}" + state: started + enabled: true + with_items: + - carbon-cache + # graphite-web is served by apache + - "{{ graphite.apache_name[ansible_pkg_mgr] }}" diff --git a/ansible/roles/ceph-graphite/templates/graphite-api.service b/ansible/roles/ceph-graphite/templates/graphite-api.service new file mode 100644 index 0000000..446efc8 --- /dev/null +++ b/ansible/roles/ceph-graphite/templates/graphite-api.service @@ -0,0 +1,15 @@ +[Unit] +Description=Graphite-API service +Requires=graphite-api.socket + +[Service] +ExecStart=/usr/bin/gunicorn3 -w2 graphite_api.app:app +Restart=on-failure +User={{ graphite.unix_user[ansible_pkg_mgr] }} +Group={{ graphite.unix_user[ansible_pkg_mgr] }} +ExecReload=/bin/kill -s HUP $MAINPID +ExecStop=/bin/kill -s TERM $MAINPID +PrivateTmp=true + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/ceph-graphite/templates/graphite-api.socket b/ansible/roles/ceph-graphite/templates/graphite-api.socket new file mode 100644 index 0000000..f173e7b --- /dev/null +++ b/ansible/roles/ceph-graphite/templates/graphite-api.socket @@ -0,0 +1,9 @@ +[Unit] +Description=graphite-api socket + +[Socket] +ListenStream=/run/graphite-api.sock +ListenStream=127.0.0.1:{{ graphite.api_port }} + +[Install] +WantedBy=sockets.target diff --git a/ansible/roles/ceph-graphite/templates/storage-schemas.conf b/ansible/roles/ceph-graphite/templates/storage-schemas.conf new file mode 100644 index 0000000..5237b32 --- /dev/null +++ b/ansible/roles/ceph-graphite/templates/storage-schemas.conf @@ -0,0 +1,19 @@ +# Schema definitions for Whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds. +# +# [name] +# pattern = regex +# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... +[collectd] +pattern = ^collectd\. +retentions = {{ whisper_retention }} + +# Carbon's internal metrics. This entry should match what is specified in +# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings +[carbon] +pattern = ^carbon\. +retentions = 60:90d + +[default_1min_for_1day] +pattern = .* +retentions = 60s:1d diff --git a/ansible/roles/ceph-graphite/tests/test_graphite.py b/ansible/roles/ceph-graphite/tests/test_graphite.py new file mode 100644 index 0000000..5121987 --- /dev/null +++ b/ansible/roles/ceph-graphite/tests/test_graphite.py @@ -0,0 +1,95 @@ +import json +import os +import pytest + +testinfra_hosts = ['ceph-grafana'] + + +class TestGraphite(object): + def maybe_skip(self, host): + vars = host.ansible.get_variables() + if vars.get('backend', dict()).get('storage', 'prometheus') != 'graphite': + pytest.skip() + + def get_ceph_hosts(self, host): + """ + Extract a list of FQDNs of Ceph hosts from the Ansible inventory + """ + groups = host.ansible.get_variables()['groups'] + ceph_groups = ('mdss', 'mgrs', 'mons', 'osds', 'rgws') + ceph_hosts = set() + for group in ceph_groups: + hosts = groups.get(group, list()) + map(ceph_hosts.add, hosts) + return list(ceph_hosts) + + @pytest.mark.parametrize( + "service", + ['carbon-cache', + dict(apt='apache2', yum='httpd')] + ) + def test_service_enabled(self, host, service): + """ Are the proper services enabled? """ + self.maybe_skip(host) + if isinstance(service, dict): + service = service[ + host.ansible('setup')['ansible_facts']['ansible_pkg_mgr']] + service = host.service(service) + assert service.is_running + assert service.is_enabled + + @pytest.mark.parametrize( + "proto,iface,port", + [ + ('tcp', '0.0.0.0', '2003'), # carbon + ('tcp', '0.0.0.0', '2004'), # carbon + ('tcp', '0.0.0.0', '8080'), # graphite + ] + ) + def test_ports_open(self, host, proto, iface, port): + """ Are the proper ports open? """ + self.maybe_skip(host) + socket_spec = "%s://%s" % (proto, iface) + if iface: + socket_spec += ':' + socket_spec += port + assert host.socket(socket_spec).is_listening + + def test_whisper_data(self, host): + """ Does whisper data exist for each Ceph host? """ + self.maybe_skip(host) + whisper_dirs = [ + '/var/lib/carbon/whisper', + '/var/lib/graphite/whisper', + ] + for whisper_dir in whisper_dirs: + if host.file(whisper_dir).exists: + break + for ceph_host in self.get_ceph_hosts(host): + whisper_subdir = os.path.join( + whisper_dir, 'collectd', ceph_host.replace('.', '/') + ) + assert host.file(whisper_subdir).is_directory + cpu_metrics = [ + 'idle.wsp', 'nice.wsp', 'steal.wsp', 'user.wsp', + 'interrupt.wsp', 'softirq.wsp', 'system.wsp', 'wait.wsp', + ] + assert any([ + host.file(os.path.join( + whisper_subdir, 'cpu', 'percent', metric + )).is_file for metric in cpu_metrics + ]) + + def test_metrics_present(self, host): + """ Does graphite know about each Ceph host? """ + self.maybe_skip(host) + ceph_hosts = self.get_ceph_hosts(host) + out = host.check_output( + "curl http://localhost:8080/metrics/find?query=collectd.*") + obj = json.loads(out) + + def extract_hostname(fragment): + return fragment['text'] + metric_hosts = map(extract_hostname, obj) + assert sorted(map(lambda s: s.split('.')[0], ceph_hosts)) == \ + sorted(metric_hosts) diff --git a/ansible/roles/ceph-mgr/defaults/main.yml b/ansible/roles/ceph-mgr/defaults/main.yml new file mode 100644 index 0000000..dfdd45e --- /dev/null +++ b/ansible/roles/ceph-mgr/defaults/main.yml @@ -0,0 +1,2 @@ +--- +defaults: {} diff --git a/ansible/roles/ceph-mgr/meta/main.yml b/ansible/roles/ceph-mgr/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-mgr/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-mgr/tasks/main.yml b/ansible/roles/ceph-mgr/tasks/main.yml new file mode 100644 index 0000000..7f7ea4e --- /dev/null +++ b/ansible/roles/ceph-mgr/tasks/main.yml @@ -0,0 +1,21 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: End play if the mgr prometheus module isn't desired + meta: end_play + when: backend.metrics != 'mgr' or backend.storage != 'prometheus' + +- name: Check to see if the mgr is containerized + command: "docker inspect ceph-mgr@{{ ansible_hostname }}" + register: mgr_container + failed_when: false + +- name: Prefix the mgr command with a docker command + set_fact: + mgr_prefix: "docker exec ceph-mgr@{{ ansible_hostname }}" + when: mgr_container.rc == 0 + +- name: Enable mgr prometheus module + command: "{{ mgr_prefix|default('') }} ceph mgr module enable prometheus" diff --git a/ansible/roles/ceph-mgr/tasks/merge_vars.yml b/ansible/roles/ceph-mgr/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-mgr/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-mgr/tests/test_mgr.py b/ansible/roles/ceph-mgr/tests/test_mgr.py new file mode 100644 index 0000000..d028c46 --- /dev/null +++ b/ansible/roles/ceph-mgr/tests/test_mgr.py @@ -0,0 +1,18 @@ +import json +import pytest + +testinfra_hosts = ['mgrs'] + + +class TestMgr(object): + def maybe_skip(self, host): + vars = host.ansible.get_variables() + if vars.get('backend', dict()).get('metrics', 'mgr') != 'mgr': + pytest.skip() + + def test_prometheus_module(self, host): + self.maybe_skip(host) + out = host.check_output("sudo ceph mgr module ls") + obj = json.loads(out) + assert 'prometheus' in obj['enabled_modules'] + diff --git a/ansible/roles/ceph-node-exporter/defaults/main.yml b/ansible/roles/ceph-node-exporter/defaults/main.yml new file mode 100644 index 0000000..79fd1dd --- /dev/null +++ b/ansible/roles/ceph-node-exporter/defaults/main.yml @@ -0,0 +1,11 @@ +--- +defaults: + node_exporter: + arch_map: + x86_64: amd64 + i386: '386' + packages: + apt: + - prometheus-node-exporter + yum: + - golang-github-prometheus-node_exporter diff --git a/ansible/roles/ceph-node-exporter/handlers/main.yml b/ansible/roles/ceph-node-exporter/handlers/main.yml new file mode 100644 index 0000000..bab4490 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Restart service + # We use the systemd module here so we can use the daemon_reload feature, + # since we're shipping the .service file ourselves + systemd: + name: node_exporter + daemon_reload: true + enabled: true + state: restarted diff --git a/ansible/roles/ceph-node-exporter/meta/main.yml b/ansible/roles/ceph-node-exporter/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-node-exporter/tasks/create_service.yml b/ansible/roles/ceph-node-exporter/tasks/create_service.yml new file mode 100644 index 0000000..67ef645 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/create_service.yml @@ -0,0 +1,31 @@ +--- +- name: Look for /etc/sysconfig and /etc/default + stat: + path: "/etc/{{ item }}/" + with_items: + - sysconfig + - default + register: stat + +- name: Decide whether to use /etc/sysconfig or /etc/default + set_fact: + sysconfig_dir: "{{ item.item }}" + with_items: "{{ stat.results }}" + when: item.stat.exists + +- name: Ship defaults file + template: + src: sysconfig + dest: "/etc/{{ sysconfig_dir }}/node_exporter" + owner: root + group: root + mode: 0644 + +- name: Ship systemd service file + template: + src: node_exporter.service + dest: '/etc/systemd/system/' + owner: root + group: root + mode: 0644 + notify: "Restart service" diff --git a/ansible/roles/ceph-node-exporter/tasks/create_user.yml b/ansible/roles/ceph-node-exporter/tasks/create_user.yml new file mode 100644 index 0000000..a135bbf --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/create_user.yml @@ -0,0 +1,7 @@ +--- +- name: Create user + user: + name: node_exporter + shell: '/bin/false' + createhome: false + system: true diff --git a/ansible/roles/ceph-node-exporter/tasks/download.yml b/ansible/roles/ceph-node-exporter/tasks/download.yml new file mode 100644 index 0000000..655367a --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/download.yml @@ -0,0 +1,44 @@ +--- +- name: Ask GitHub about the most recent release + # We use local_action here to reduce the amount of times we hit the GitHub + # API; without it, a moderately-large cluster could actually cause them to + # invoke their API rate limiting! + local_action: + module: uri + url: https://api.github.com/repos/prometheus/node_exporter/releases/latest + return_content: true + connection: local + become: false + run_once: true + register: latest_release + +- name: Decide which release to look for + set_fact: + os_string: "linux-{{ node_exporter.arch_map[ansible_architecture]|default(ansible_architecture) }}" + +- name: Find the correct release + set_fact: + release_url: "{{ item.browser_download_url }}" + when: os_string in item.name + with_items: "{{ latest_release.json.assets }}" + no_log: true + +- name: Download release + get_url: + url: "{{ release_url }}" + dest: '/tmp/' + register: download + +- name: Extract tarball + unarchive: + src: "{{ download.dest }}" + remote_src: true + dest: "/tmp/" + +- name: Copy binary into place + copy: + src: "{{ download.dest|replace('.tar.gz', '') }}/node_exporter" + remote_src: true + dest: "/usr/local/sbin/" + owner: node_exporter + mode: 0744 diff --git a/ansible/roles/ceph-node-exporter/tasks/install_packages.yml b/ansible/roles/ceph-node-exporter/tasks/install_packages.yml new file mode 100644 index 0000000..4513a51 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/install_packages.yml @@ -0,0 +1,6 @@ +--- +- name: Install packages + package: + name: "{{ node_exporter.packages[ansible_pkg_mgr] }}" + state: latest + notify: "Restart service" diff --git a/ansible/roles/ceph-node-exporter/tasks/main.yml b/ansible/roles/ceph-node-exporter/tasks/main.yml new file mode 100644 index 0000000..0c0fbf1 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/main.yml @@ -0,0 +1,30 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: End play if the prometheus backend isn't desired + meta: end_play + when: backend.storage != 'prometheus' + +- include: install_packages.yml + when: + - not devel_mode + +- include: create_user.yml + when: + - devel_mode + tags: + - node_exporter + +- include: download.yml + when: + - devel_mode + tags: + - node_exporter + +- include: create_service.yml + when: + - devel_mode + tags: + - node_exporter diff --git a/ansible/roles/ceph-node-exporter/tasks/merge_vars.yml b/ansible/roles/ceph-node-exporter/tasks/merge_vars.yml new file mode 100644 index 0000000..f8dbcd0 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tasks/merge_vars.yml @@ -0,0 +1,5 @@ +--- +- name: Combine default settings and user-defined variables + set_fact: {"{{ item }}": "{% if vars[item] is not defined %}{{ defaults[item] }}{% elif vars[item] is mapping %}{{ defaults[item]|combine(vars[item]|default({})) }}{% else %}{{ vars[item] }}{% endif %}"} + with_items: "{{ defaults.keys() }}" + no_log: true diff --git a/ansible/roles/ceph-node-exporter/templates/node_exporter.service b/ansible/roles/ceph-node-exporter/templates/node_exporter.service new file mode 100644 index 0000000..87e8f58 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/templates/node_exporter.service @@ -0,0 +1,10 @@ +[Unit] +Description=Node Exporter + +[Service] +User=node_exporter +EnvironmentFile=/etc/{{ sysconfig_dir }}/node_exporter +ExecStart=/usr/local/sbin/node_exporter $OPTIONS + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/ceph-node-exporter/templates/sysconfig b/ansible/roles/ceph-node-exporter/templates/sysconfig new file mode 100644 index 0000000..0b7a839 --- /dev/null +++ b/ansible/roles/ceph-node-exporter/templates/sysconfig @@ -0,0 +1 @@ +{{ defaults.node_exporter.sysconfig|default('') }} diff --git a/ansible/roles/ceph-node-exporter/tests/test_node_exporter.py b/ansible/roles/ceph-node-exporter/tests/test_node_exporter.py new file mode 100644 index 0000000..45001aa --- /dev/null +++ b/ansible/roles/ceph-node-exporter/tests/test_node_exporter.py @@ -0,0 +1,21 @@ +import pytest + +testinfra_hosts = ['!ceph-grafana'] + + +class TestNodeExporter(object): + def maybe_skip(self, host): + vars = host.ansible.get_variables() + if vars.get('backend', dict()).get('storage', 'prometheus') != 'prometheus': + pytest.skip() + + def test_service_enabled(self, host): + self.maybe_skip(host) + assert host.service('node_exporter').is_enabled + assert host.service('node_exporter').is_running + + def test_port_open(self, host): + """ Is the node_exporter port open? """ + self.maybe_skip(host) + socket_spec = "tcp://0.0.0.0:9100" + assert host.socket(socket_spec).is_listening diff --git a/ansible/roles/ceph-prometheus/defaults/main.yml b/ansible/roles/ceph-prometheus/defaults/main.yml new file mode 100644 index 0000000..15dbd6e --- /dev/null +++ b/ansible/roles/ceph-prometheus/defaults/main.yml @@ -0,0 +1,8 @@ +--- +defaults: + prometheus: + containerized: false + container_name: prom/prometheus + version: latest + data_dir: /var/lib/cephmetrics + user_id: '65534' # This is the UID used by the prom/prometheus docker image diff --git a/ansible/roles/ceph-prometheus/files/cephmetrics-prometheus.service b/ansible/roles/ceph-prometheus/files/cephmetrics-prometheus.service new file mode 100644 index 0000000..4af2c4b --- /dev/null +++ b/ansible/roles/ceph-prometheus/files/cephmetrics-prometheus.service @@ -0,0 +1,15 @@ +[Unit] +Description=cephmetrics-prometheus +After=docker.service + +[Service] +EnvironmentFile=-/etc/environment +ExecStart=/usr/bin/docker start --attach cephmetrics-prometheus +ExecStop=-/usr/bin/docker stop cephmetrics-prometheus +Restart=always +RestartSec=10s +TimeoutStartSec=120 +TimeoutStopSec=15 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/ceph-prometheus/handlers/main.yml b/ansible/roles/ceph-prometheus/handlers/main.yml new file mode 100644 index 0000000..e1b7dfd --- /dev/null +++ b/ansible/roles/ceph-prometheus/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Service handler + # We use the systemd module here so we can use the daemon_reload feature, + # since we're shipping the .service file ourselves + systemd: + name: cephmetrics-prometheus + daemon_reload: true + enabled: true + state: restarted diff --git a/ansible/roles/ceph-prometheus/meta/main.yml b/ansible/roles/ceph-prometheus/meta/main.yml new file mode 100644 index 0000000..56dd016 --- /dev/null +++ b/ansible/roles/ceph-prometheus/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: cephmetrics-common diff --git a/ansible/roles/ceph-prometheus/tasks/main.yml b/ansible/roles/ceph-prometheus/tasks/main.yml new file mode 100644 index 0000000..c0cc0d0 --- /dev/null +++ b/ansible/roles/ceph-prometheus/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- include: merge_vars.yml + tags: + - always + +- name: End play if the prometheus backend isn't desired + meta: end_play + when: backend.metrics != 'mgr' or backend.storage != 'prometheus' + +- include: sanity.yml + tags: + - sanity + +- name: Create prometheus data directory + file: + path: "{{ prometheus.data_dir }}" + state: directory + owner: "{{ prometheus.user_id }}" + +- name: Write config file + template: + src: prometheus.yml + dest: "{{ prometheus.data_dir }}/" + owner: "{{ prometheus.user_id }}" + notify: Service handler + +- include: setup_container.yml + when: prometheus.containerized + +- name: Ship systemd service + copy: + src: cephmetrics-prometheus.service + dest: "/etc/systemd/system/" + owner: root + group: root + mode: 0644 + notify: Service handler diff --git a/ansible/roles/ceph-prometheus/tasks/merge_vars.yml b/ansible/roles/ceph-prometheus/tasks/merge_vars.yml new file mode 120000 index 0000000..5057c16 --- /dev/null +++ b/ansible/roles/ceph-prometheus/tasks/merge_vars.yml @@ -0,0 +1 @@ +../../cephmetrics-common/tasks/merge_vars.yml \ No newline at end of file diff --git a/ansible/roles/ceph-prometheus/tasks/sanity.yml b/ansible/roles/ceph-prometheus/tasks/sanity.yml new file mode 100644 index 0000000..1187000 --- /dev/null +++ b/ansible/roles/ceph-prometheus/tasks/sanity.yml @@ -0,0 +1,5 @@ +--- +- name: Fail when prometheus.containerized is False + fail: + msg: "This role only supports a containerized deployment at this time" + when: not prometheus.containerized diff --git a/ansible/roles/ceph-prometheus/tasks/setup_container.yml b/ansible/roles/ceph-prometheus/tasks/setup_container.yml new file mode 100644 index 0000000..25822d7 --- /dev/null +++ b/ansible/roles/ceph-prometheus/tasks/setup_container.yml @@ -0,0 +1,24 @@ +--- +- name: Include ceph-docker + include_role: + name: ceph-docker + allow_duplicates: false + when: prometheus.containerized + tags: docker + +- name: Start docker container + docker_container: + name: cephmetrics-prometheus + image: "{{ prometheus.container_name }}:{{ prometheus.version }}" + command: "--config.file=/prometheus/prometheus.yml" + restart_policy: no + published_ports: '9090:9090' + detach: true + volumes: + - "{{ prometheus.data_dir }}:/prometheus:Z" + networks: + - name: "{{ docker.network_name }}" + user: "{{ prometheus.user_id }}" + keep_volumes: true + pull: true + notify: Service handler diff --git a/ansible/roles/ceph-prometheus/templates/prometheus.yml b/ansible/roles/ceph-prometheus/templates/prometheus.yml new file mode 100644 index 0000000..f570421 --- /dev/null +++ b/ansible/roles/ceph-prometheus/templates/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + - job_name: 'ceph' + honor_labels: true + static_configs: +{% for host in groups['mgrs'] %} + - targets: ['{{ host }}:9283'] + labels: + instance: 'ceph_cluster' +{% endfor %} + - job_name: 'node' + static_configs: +{% for host in groups['all'] %} + - targets: ['{{ host }}:9100'] + labels: + instance: "{{ hostvars[host]['ansible_nodename'] }}" +{% endfor %} diff --git a/ansible/roles/ceph-prometheus/tests/test_prometheus.py b/ansible/roles/ceph-prometheus/tests/test_prometheus.py new file mode 100644 index 0000000..bd73bac --- /dev/null +++ b/ansible/roles/ceph-prometheus/tests/test_prometheus.py @@ -0,0 +1,16 @@ +import pytest + +testinfra_hosts = ['ceph-grafana'] + + +class TestPrometheus(object): + def maybe_skip(self, host): + vars = host.ansible.get_variables() + if vars.get('backend', dict()).get('storage', 'prometheus') != 'prometheus': + pytest.skip() + + def test_port_open(self, host): + """ Is the prometheus port open? """ + self.maybe_skip(host) + socket_spec = "tcp://0.0.0.0:9090" + assert host.socket(socket_spec).is_listening diff --git a/ansible/roles/cephmetrics-common/defaults/main.yml b/ansible/roles/cephmetrics-common/defaults/main.yml new file mode 100644 index 0000000..1c0469b --- /dev/null +++ b/ansible/roles/cephmetrics-common/defaults/main.yml @@ -0,0 +1,60 @@ +--- +defaults: + backend: + metrics: mgr # mgr, cephmetrics + storage: prometheus # prometheus, graphite + containerized_deployment: false + use_epel: false + devel_mode: true + graphite: + service: graphite-web + web_port: "{{ graphite_port | default('8080') }}" + api_port: 8888 + # The unix account running the graphite-web process + unix_user: + yum: apache + apt: _graphite + build_index: + yum: '/usr/bin/graphite-build-index' + apt: '/usr/bin/graphite-build-search-index' + apache_name: + yum: 'httpd' + apt: 'apache2' + apache_conf: + yum: '/etc/httpd/conf/httpd.conf' + apt: '/etc/apache2/apache2.conf' + site_conf: + yum: '/etc/httpd/conf.d/graphite-web.conf' + apt: '/etc/apache2/sites-available/graphite-web.conf' + app_conf: + yum: '/etc/graphite-web/local_settings.py' + apt: '/etc/graphite/local_settings.py' + carbon: + unix_user: + yum: carbon + apt: _graphite + storage_dir: + yum: /var/lib/carbon + apt: /var/lib/graphite + whisper: + retention: + - ['10s', '7d'] + - ['1m', '30d'] + - ['15m', '5y'] + # The firewalld zone that carbon and grafana will use + firewalld_zone: public + devel_packages: + yum: + # unzip is needed to extract the Vonage plugin + - unzip + - grafana + # for dashUpdater.py + - PyYAML + - python-requests + apt: + # unzip is needed to extract the Vonage plugin + - unzip + - grafana + # for dashUpdater.py + - python-yaml + - python-requests diff --git a/ansible/roles/cephmetrics-common/files/cephmetrics-devel.repo b/ansible/roles/cephmetrics-common/files/cephmetrics-devel.repo new file mode 100644 index 0000000..48d8733 --- /dev/null +++ b/ansible/roles/cephmetrics-common/files/cephmetrics-devel.repo @@ -0,0 +1,20 @@ +[cephmetrics] +name=cephmetrics packages for \$basearch +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/\$basearch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-noarch] +name=cephmetrics noarch packages +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/noarch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-source] +name=cephmetrics source packages +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/SRPMS +enabled=1 +gpgcheck=0 +type=rpm-md diff --git a/ansible/roles/cephmetrics-common/tasks/main.yml b/ansible/roles/cephmetrics-common/tasks/main.yml new file mode 100644 index 0000000..258d8d8 --- /dev/null +++ b/ansible/roles/cephmetrics-common/tasks/main.yml @@ -0,0 +1,11 @@ +--- +- include: merge_vars.yml + tags: + - always + +- include: setup_repos.yml + when: + - not containerized_deployment + tags: + - packages + - repos diff --git a/ansible/roles/cephmetrics-common/tasks/merge_vars.yml b/ansible/roles/cephmetrics-common/tasks/merge_vars.yml new file mode 100644 index 0000000..f8dbcd0 --- /dev/null +++ b/ansible/roles/cephmetrics-common/tasks/merge_vars.yml @@ -0,0 +1,5 @@ +--- +- name: Combine default settings and user-defined variables + set_fact: {"{{ item }}": "{% if vars[item] is not defined %}{{ defaults[item] }}{% elif vars[item] is mapping %}{{ defaults[item]|combine(vars[item]|default({})) }}{% else %}{{ vars[item] }}{% endif %}"} + with_items: "{{ defaults.keys() }}" + no_log: true diff --git a/ansible/roles/cephmetrics-common/tasks/setup_repos.yml b/ansible/roles/cephmetrics-common/tasks/setup_repos.yml new file mode 100644 index 0000000..2f4413b --- /dev/null +++ b/ansible/roles/cephmetrics-common/tasks/setup_repos.yml @@ -0,0 +1,57 @@ +--- +- name: Enable EPEL + yum: + name: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" + state: "{{ 'present' if use_epel else 'absent' }}" + when: + - ansible_pkg_mgr == "yum" + - devel_mode + +- name: Install cephmetrics development repo + copy: + src: files/cephmetrics-devel.repo + dest: /etc/yum.repos.d/cephmetrics.repo + when: + - ansible_pkg_mgr == "yum" + - not use_epel + - devel_mode + +- name: Remove cephmetrics repo + file: + path: /etc/yum.repos.d/cephmetrics.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - use_epel + - devel_mode + +- name: Remove old cephmetrics production repo + file: + path: /etc/yum.repos.d/cephmetrics.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - not devel_mode + +- name: Add custom repos + template: + src: cephmetrics-custom.repo + dest: /etc/yum.repos.d/cephmetrics-custom.repo + when: + - ansible_pkg_mgr == "yum" + - custom_repos is defined + +- name: Remove custom repos + file: + path: /etc/yum.repos.d/cephmetrics-custom.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - custom_repos is not defined + +- name: Update apt cache + apt: + update_cache: true + when: + - ansible_pkg_mgr == 'apt' + - devel_mode diff --git a/ansible/roles/cephmetrics-common/templates/cephmetrics-custom.repo b/ansible/roles/cephmetrics-common/templates/cephmetrics-custom.repo new file mode 100644 index 0000000..1aa7b32 --- /dev/null +++ b/ansible/roles/cephmetrics-common/templates/cephmetrics-custom.repo @@ -0,0 +1,9 @@ +{% for repo in custom_repos.yum %} +[{{ repo.name }}] +name={{ repo.name }} +baseurl={{ repo.baseurl }} +enabled={{ repo.enabled|default('1') }} +gpgcheck={{ repo.gpgcheck|default('0') }} +type={{ repo.type|default('rpm-md') }} + +{% endfor %} diff --git a/cephmetrics.py b/cephmetrics.py new file mode 100644 index 0000000..e265664 --- /dev/null +++ b/cephmetrics.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python + +import os +import logging +import collectd + +from collectors.mon import Mon +from collectors.rgw import RGW +from collectors.osd import OSDs +from collectors.iscsi import ISCSIGateway +from collectors.common import flatten_dict, get_hostname + + +PLUGIN_NAME = 'cephmetrics' + + +class Ceph(object): + + roles = { + "mon": "Mon", + "rgw": "RGW", + "osd": "OSDs", + "iscsi": "ISCSIGateway" + } + + def __init__(self): + self.cluster_name = None + self.event_url = None + self.host_name = get_hostname() + + self.mon = None + self.rgw = None + self.osd = None + self.iscsi = None + + def probe(self): + """ + set up which collector(s) to use + """ + + if Mon.probe(): + self.mon = Mon(self, self.cluster_name) + + if RGW.probe(): + self.rgw = RGW(self, self.cluster_name) + + if OSDs.probe(): + self.osd = OSDs(self, self.cluster_name) + + if ISCSIGateway.probe(): + self.iscsi = ISCSIGateway(self, self.cluster_name) + + def get_stats(self): + + stats = {} + + if self.mon: + stats['mon'] = self.mon.get_stats() + + if self.rgw: + stats['rgw'] = self.rgw.get_stats() + + if self.osd: + stats['osd'] = self.osd.get_stats() + + if self.iscsi: + stats['iscsi'] = self.iscsi.get_stats() + + return stats + + +def write_stats(role_metrics, stats): + + flat_stats = flatten_dict(stats, '.') + + for key_name in flat_stats: + attr_name = key_name.split('.')[-1] + + # TODO: this needs some more think time, since the key from the name + # is not the key of the all_metrics dict + if attr_name in role_metrics: + attr_type = role_metrics[attr_name][1] # gauge / derive etc + else: + # assign a default + attr_type = 'gauge' + + attr_value = flat_stats[key_name] + + val = collectd.Values(plugin=PLUGIN_NAME, type=attr_type) + instance_name = "{}.{}".format(CEPH.cluster_name, + key_name) + val.type_instance = instance_name + val.values = [attr_value] + val.dispatch() + + +def configure_callback(conf): + + valid_log_levels = ['debug', 'info'] + + global CEPH + module_parms = {node.key: node.values[0] for node in conf.children} + + log_level = module_parms.get('LogLevel', 'debug') + if log_level not in valid_log_levels: + collectd.error("cephmetrics: LogLevel specified is invalid - must" + " be :{}".format(' or '.join(valid_log_levels))) + + if 'EventURL' in module_parms: + CEPH.event_url = module_parms['EventURL'] + collectd.info("cephmetrics: Event messages enabled for target " + "{}".format(CEPH.event_url)) + else: + collectd.warning("cephmetrics: EventURL missing - health events " + "will not be reported") + + if 'ClusterName' in module_parms: + cluster_name = module_parms['ClusterName'] + # cluster name is all we need to get started + if not os.path.exists('/etc/ceph/{}.conf'.format(cluster_name)): + collectd.error("Clustername given ('{}') not found in " + "/etc/ceph".format(module_parms['ClusterName'])) + + # let's assume the conf file is OK to use + CEPH.cluster_name = cluster_name + + setup_module_logging(log_level) + + CEPH.probe() + + collectd.info("{}: Roles detected - " + "mon:{} osd:{} rgw:{} " + "iscsi:{}".format(__name__, + isinstance(CEPH.mon, Mon), + isinstance(CEPH.osd, OSDs), + isinstance(CEPH.rgw, RGW), + isinstance(CEPH.iscsi, ISCSIGateway))) + else: + collectd.error("cephmetrics: ClusterName is required") + + +def setup_module_logging(log_level): + + level = {"debug": logging.DEBUG, + "info": logging.INFO} + + logging.getLogger('cephmetrics') + logging.basicConfig(filename='/var/log/collectd-cephmetrics.log', + format='%(asctime)s - %(levelname)-7s - ' + '[%(filename)s:%(lineno)s:%(funcName)s() - ' + '%(message)s', + filemode='w', + level=level.get(log_level)) + + +def read_callback(): + + stats = CEPH.get_stats() + + for role in Ceph.roles: + if role in stats: + collector = getattr(CEPH, role) + + write_stats(collector.all_metrics, stats[role]) + + error_handler(collector) + + +def error_handler(collector): + if not collector.error: + return + + # detected an error, let's flag it to the collectd log + msg_text = ",".join(collector.error_msgs) + + collectd.error("cephmetrics error: {} - {}".format(collector._name, + msg_text)) + + # reset the collector instance's error tracking + collector.error = False + del collector.error_msgs[:] + + +if __name__ == '__main__': + + # run interactively or maybe test the code + + pass + +else: + + CEPH = Ceph() + + collectd.register_config(configure_callback) + collectd.register_read(read_callback) diff --git a/cephmetrics.spec.in b/cephmetrics.spec.in new file mode 100644 index 0000000..c03109a --- /dev/null +++ b/cephmetrics.spec.in @@ -0,0 +1,137 @@ +%define debug_package %{nil} + +%{!?_selinux_policy_version: %global _selinux_policy_version %(sed -e 's,.*selinux-policy-\\([^/]*\\)/.*,\\1,' /usr/share/selinux/devel/policyhelp 2>/dev/null)} + +Name: cephmetrics +Version: @VERSION@ +Release: @RELEASE@%{?dist} +Summary: Monitoring service for Ceph clusters + +License: GPLv3 +URL: https://github.com/ceph/cephmetrics +Source0: cephmetrics-@VERSION@.zip +Source1: vonage-status-panel-1.0.4.zip +Source2: grafana-piechart-panel-1.1.5.zip + +# SELinux deps +BuildRequires: checkpolicy +BuildRequires: selinux-policy-devel +BuildRequires: /usr/share/selinux/devel/policyhelp +BuildRequires: hardlink +Requires: policycoreutils, libselinux-utils +Requires(post): selinux-policy >= %{_selinux_policy_version}, policycoreutils +Requires(postun): policycoreutils + +Requires: PyYAML +Requires: graphite-web +Requires: python-carbon +Requires: python-requests +Requires: cephmetrics-grafana-plugins = %{version}-%{release} + +%description +The monitoring service with web frontend for Ceph storage clusters providing several statistical data graphed by grafana. + + +%package grafana-plugins +Summary: Vonage plugin for graphana +Requires: grafana +License: ASL 2.0 +%description grafana-plugins +The vonage status panel and piechart panel for grafana web server. + + +%package collectors +Summary: Ceph metrics collectors +Requires: collectd +Requires: collectd-python +Requires: libsemanage-python +%description collectors +The collectors for Ceph implemented with help of statistics collection daemon collectd. + + +%package ansible +Summary: Ansible playbooks for Ceph metrics +Requires: ceph-ansible +Requires: python-netaddr +%description ansible +Ansible playbooks for Ceph metrics + + +%prep +%setup -q +# Disable devel_mode in the rpms +patch -p1 < patches/0001-ansible-Disable-devel_mode.patch +# Unzip grafana plugins +unzip %SOURCE1 +mv -f Vonage* cephmetrics-vonage +unzip %SOURCE2 +mv -f grafana-piechart-panel* cephmetrics-piechart + + +%build +make -f /usr/share/selinux/devel/Makefile cephmetrics.pp + +# Change the devel_mode defaults +sed -i -e 's/devel_mode: true/devel_mode: false/' ansible/roles/*/defaults/main.yml + +# Support light mode better +sed -i -e 's/green/rgb(1,167,1)/g' cephmetrics-vonage/dist/css/status_panel.css + + +%install +# Install dashUpdater.py +install -d %{buildroot}%{_libexecdir}/cephmetrics +install -m 755 dashUpdater.py %{buildroot}%{_libexecdir}/cephmetrics/ +install -d %{buildroot}%{_datadir}/cephmetrics/dashboards +install -m 644 dashboards/current/* %{buildroot}%{_datadir}/cephmetrics/dashboards/ + +# Install vonage and piechart plugin +install -d %{buildroot}%{_localstatedir}/lib/grafana/plugins/ +cp -r cephmetrics-vonage %{buildroot}%{_localstatedir}/lib/grafana/plugins/ +cp -r cephmetrics-piechart %{buildroot}%{_localstatedir}/lib/grafana/plugins/ + +# Install collectors +install -d %{buildroot}%{_libdir}/collectd/cephmetrics/collectors +install -m 755 cephmetrics.py %{buildroot}%{_libdir}/collectd/cephmetrics +install -m 644 collectors/* %{buildroot}%{_libdir}/collectd/cephmetrics/collectors + +# Install ansible playbooks +install -d %{buildroot}%{_datadir} +cp -L -r ansible %{buildroot}%{_datadir}/cephmetrics-ansible + +# Install SELinux +install -d %{buildroot}%{_datadir}/selinux/packages +install -m 644 cephmetrics.pp %{buildroot}%{_datadir}/selinux/packages/cephmetrics.pp +exit 0 + + +%files +%{_libexecdir}/cephmetrics/dashUpdater.py +%{_datadir}/cephmetrics +%doc dashboard.yml +%doc etc/grafana +%doc LICENSE +%doc README + +%files grafana-plugins +%{_localstatedir}/lib/grafana/plugins/cephmetrics-vonage +%{_localstatedir}/lib/grafana/plugins/cephmetrics-piechart + +%files collectors +%{_libdir}/collectd/cephmetrics +%doc etc/collectd.conf +%doc etc/collectd.d +%{_datadir}/selinux/packages/cephmetrics.pp + +%post collectors +/usr/sbin/semodule -i %{_datadir}/selinux/packages/cephmetrics.pp &> /dev/null || : + +%postun collectors +if [ $1 == 0 ] ; then + /usr/sbin/semodule -r cephmetrics &> /dev/null || : +fi + +%files ansible +%{_datadir}/cephmetrics-ansible + +%changelog diff --git a/collectors/__init__.py b/collectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/base.py b/collectors/base.py new file mode 100644 index 0000000..6e7e83c --- /dev/null +++ b/collectors/base.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +import json +import time +import logging +import os + +from ceph_daemon import admin_socket +from collectors.common import os_cmd, cmd_exists + + +class BaseCollector(object): + + class_to_cmd = { + "Mon": "ceph-mon", + "RGW": "radosgw", + "OSDs": "ceph-osd", + "ISCSIGateway": "gwcli" + } + + def __init__(self, parent, cluster_name, admin_socket=None): + self._name = self.__class__.__name__ + self._parent = parent + self.cluster_name = cluster_name + self.admin_socket = admin_socket + self.version = self.get_version() + self.error = False + self.error_msgs = [] + + self.logger = logging.getLogger('cephmetrics') + + self.logger.info("ceph version for {}: {}".format(self._name, + self.version)) + + def _admin_socket(self, cmds=None, socket_path=None): + + adm_socket = self.admin_socket if not socket_path else socket_path + + if not cmds: + cmds = ['perf', 'dump'] + + start = time.time() + + if os.path.exists(adm_socket): + try: + response = admin_socket(adm_socket, cmds, + format='json') + except RuntimeError as e: + self.logger.error("admin_socket error: {}".format(e.message)) + self.error = True + self.error_msgs = [e.message] + resp = {} + else: + resp = json.loads(response) + else: + resp = {} + + end = time.time() + + self.logger.debug("admin_socket call '{}' : " + "{:.3f}s".format(' '.join(cmds), + (end - start))) + + return resp + + def get_version(self): + """ + Although the version number is v.r.m based, this isn't a float so it + can't be stored as a number, so the version returned is just the + vesion.release components (i.e. looks like a float!) + :return: (float) version number (v.r format) + """ + # version command returns output like this + # ceph version 10.2.2-15.el7cp (60cd52496ca02bdde9c2f4191e617f75166d87b6) + + cmd = BaseCollector.class_to_cmd.get(self._name, 'ceph') + vers_output = os_cmd('{} -v'.format(cmd)) + if vers_output: + return float('.'.join(vers_output.split()[2].split('.')[:2])) + else: + return 0 + + @classmethod + def probe(cls): + """ + Look for the relevant binary to signify a specific ceph role + :return: (bool) showing whether the binary was found or not + """ + + return cmd_exists(BaseCollector.class_to_cmd.get(cls.__name__)) + + def get_stats(self): + + return {} diff --git a/collectors/common.py b/collectors/common.py new file mode 100644 index 0000000..60b62bf --- /dev/null +++ b/collectors/common.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python + + +import socket +import os +import subprocess + + +def cmd_exists(command): + return any( + os.access(os.path.join(path, command), os.X_OK) + for path in os.environ["PATH"].split(os.pathsep) + ) + + +def os_cmd(command): + """ + Issue a command to the OS and return the output. NB. check_output default + is shell=False + :param command: (str) OS command + :return: (str) command response (lines terminated with \n) + """ + cmd_list = command.split(' ') + if cmd_exists(cmd_list[0]): + cmd_output = subprocess.check_output(cmd_list, + stderr=subprocess.STDOUT).rstrip() + return cmd_output + else: + return '' + + +def get_hostname(): + return socket.gethostname().split('.')[0] + + +def get_names(): + return [get_hostname()] + + +def add_dicts(dict1, dict2): + """ + Add dictionary values together + :param dict1: + :param dict2: + :return: dict with matching fields sum'd together + """ + return {key: dict1.get(key, 0) + dict2.get(key, 0) + for key in set(dict1).union(dict2)} + + +def merge_dicts(dict1, dict2): + """ + merges two dicts together to form a single dict. when dict keys overlap + the value in the 2nd dict takes precedence + :param dict1: + :param dict2: + :return: combined dict + """ + + new = dict1.copy() + new.update(dict2) + + return new + + +def flatten_dict(data, separator='.', prefix=''): + """ + flatten a dict, so it is just simple key/value pairs + :param data: (dict) + :param separator: (str) char to use when combining keys + :param prefix: key prefix + :return: + """ + return {prefix + separator + k if prefix else k: v + for kk, vv in data.items() + for k, v in flatten_dict(vv, separator, kk).items() + } if isinstance(data, dict) else {prefix: data} + + +def todict(obj): + """ + convert an object to a dict representation + :param obj: (object) object to examine, to extract variables/values from + :return: (dict) representation of the given object + """ + data = {} + for key, value in obj.__dict__.iteritems(): + + if key.startswith('_'): + continue + + try: + data[key] = todict(value) + except AttributeError: + data[key] = value + + return data + + +def fread(file_name=None): + """ + Simple read function for files of a single value + :param file_name: (str) file name to read + :return: (str) contents of the file, or null string for non-existent file + """ + if os.path.exists(file_name): + with open(file_name, 'r') as f: + setting = f.read().rstrip() + return setting + else: + return '' + + +def freadlines(file_name=None): + """ + simple readlines function to return all records of a given file + :param file_name: (str) file name to read + :return: (list) contents of the file, empty if file doesn't exist + """ + + if os.path.exists(file_name): + with open(file_name, 'r') as f: + data = f.readlines() + return data + else: + return [] + + + +class IOstat(object): + raw_metrics = [ + "_reads", + "_reads_mrgd", + "_sectors_read", + "_read_ms", + "_writes", + "_writes_mrgd", + "_sectors_written", + "_write_ms", + "_current_io", + "_ms_active_io", + "_ms_active_io_w" + ] + + sector_size = 512 + + metrics = { + "iops": ("iops", "gauge"), + "r_iops": ("r_iops", "gauge"), + "w_iops": ("w_iops", "gauge"), + "bytes_per_sec": ("bytes_per_sec", "gauge"), + "r_bytes_per_sec": ("r_bytes_per_sec", "gauge"), + "w_bytes_per_sec": ("w_bytes_per_sec", "gauge"), + "util": ("util", "gauge"), + "await": ("await", "gauge"), + "r_await": ("r_await", "gauge"), + "w_await": ("w_await", "gauge"), + } + + def __init__(self): + self._previous = [] + self._current = [] + + # Seed the metrics we're interested in + for ctr in IOstat.metrics.keys(): + setattr(self, ctr, 0) + + def __str__(self): + s = '\n- IOstat object:\n' + for key in sorted(vars(self)): + s += '\t{} ... {}\n'.format(key, getattr(self, key)) + return s + + def _calc_raw_delta(self): + if not self._previous: + # nothing to compute yet + for ptr in range(len(IOstat.raw_metrics)): + key = IOstat.raw_metrics[ptr] + setattr(self, key, 0) + else: + for ptr in range(len(IOstat.raw_metrics)): + key = IOstat.raw_metrics[ptr] + setattr(self, key, (int(self._current[ptr]) - + int(self._previous[ptr]))) + + def compute(self, sample_interval): + """ + Calculate the iostats for this device + """ + + self._calc_raw_delta() + + if sample_interval > 0: + interval_ms = sample_interval * 1000 + total_io = self._reads + self._writes + self.util = float(self._ms_active_io) / interval_ms * 100 + self.iops = int(total_io) / sample_interval + self.r_iops = int(self._reads) / sample_interval + self.w_iops = int(self._writes) / sample_interval + self.await = float( + self._write_ms + self._read_ms) / total_io if total_io > 0 else 0 + self.w_await = float( + self._write_ms) / self._writes if self._writes > 0 else 0 + self.r_await = float( + self._read_ms) / self._reads if self._reads > 0 else 0 + self.r_bytes_per_sec = (float( + self._sectors_read * IOstat.sector_size)) / sample_interval + self.w_bytes_per_sec = (float( + self._sectors_written * IOstat.sector_size)) / sample_interval + self.bytes_per_sec = self.r_bytes_per_sec + self.w_bytes_per_sec + + +class Disk(object): + + metrics = { + "rotational": ("rotational", "gauge"), + "disk_size": ("disk_size", "gauge"), + "osd_id": ("osd_id", "gauge") + } + + osd_types = {"filestore": 0, + "bluestore": 1} + + def __init__(self, device_name, path_name=None, osd_id=None, + in_osd_type="filestore", encrypted=0): + + self._name = device_name + self._path_name = path_name + self._base_dev = Disk.get_base_dev(device_name) + self.osd_id = osd_id + + self.rotational = self._get_rota() + self.disk_size = self._get_size() + self.perf = IOstat() + self.encrypted = encrypted + self.osd_type = Disk.osd_types[in_osd_type] + + def _get_size(self): + size = fread("/sys/block/{}/size".format(self._base_dev)) + if size.isdigit(): + size = int(size) * 512 + else: + size = 0 + return size + + def _get_rota(self): + rota = fread("/sys/block/{}/queue/rotational".format(self._base_dev)) + if rota.isdigit(): + # 0 = flash/nvme/ssd, 1 = HDD + return rota + else: + # default to a HDD response + return 1 + + @staticmethod + def get_base_dev(dev_name): + + # for intelcas devices, just use the device name as is + if dev_name.startswith('intelcas'): + device = dev_name + elif dev_name.startswith('nvme'): + if 'p' in dev_name: + device = dev_name[:(dev_name.index('p'))] + else: + device = dev_name + else: + # default strip any numeric ie. sdaa1 -> sdaa + device = filter(lambda ch: ch.isalpha(), dev_name) + + return device + diff --git a/collectors/iscsi.py b/collectors/iscsi.py new file mode 100644 index 0000000..3c85a1e --- /dev/null +++ b/collectors/iscsi.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python2 + +# requires python-rtslib_fb for LIO interaction +# +# NB. the rtslib_fb module is dynamically loaded by the ISCSIGateway +# class instantiation. This prevents import errors within the generic parent +# module cephmetrics +# +import os +import sys +import time + +from collectors.base import BaseCollector +from collectors.common import fread + + +class Client(object): + + def __init__(self, iqn): + self.iqn = iqn + self.name = iqn.replace('.', '-') + self.luns = {} + self.lun_count = 0 + self._cycle = 0 + + def dump(self): + client_dump = {} + lun_info = {} + client_dump[self.name] = {"luns": {}, + "lun_count": self.lun_count} + for lun_name in self.luns: + lun = self.luns[lun_name] + lun_info.update(lun.dump()) + + return {self.name: {"luns": lun_info, + "lun_count": len(lun_info)} + } + + +class LUN(object): + + def __init__(self, client, tpg_lun): + self._path = tpg_lun.storage_object.path + self._tpg_lun = tpg_lun + self._name = tpg_lun.storage_object.name + self._display_name = tpg_lun.storage_object.name.replace('.', "-") + self._so = tpg_lun.storage_object + self._client = client + self._cycle = 0 + self.size = 0 + self.iops = 0 + self.read_bytes_per_sec = 0 + self.write_bytes_per_sec = 0 + self.total_bytes_per_sec = 0 + self.active_path = 0 + + def refresh(self, cycle_id): + self._cycle = cycle_id + self.size = self._so.size + stats_path = os.path.join(self._path, 'statistics/scsi_lu') + self.iops = int(fread(os.path.join(stats_path, "num_cmds"))) + read_mb = float(fread(os.path.join(stats_path, "read_mbytes"))) + write_mb = float(fread(os.path.join(stats_path, "write_mbytes"))) + self.read_bytes_per_sec = int(read_mb * 1024 ** 2) + self.write_bytes_per_sec = int(write_mb * 1024 ** 2) + self.total_bytes_per_sec = self.read_bytes_per_sec + \ + self.write_bytes_per_sec + + if self._tpg_lun.alua_tg_pt_gp_name == 'ao': + self.active_path = 1 + else: + self.active_path = 0 + + def dump(self): + return {self._display_name: {k: getattr(self, k) for k in self.__dict__ + if not k.startswith("_")}} + + +class ISCSIGateway(BaseCollector): + """ + created on a host that has a /sys/kernel/config/target/iscsi dir + i.e. there is an iscsi gateway here! + """ + + all_metrics = { + "lun_count": ("lun_count", "gauge"), + "client_count": ("client_count", "gauge"), + "tpg_count": ("tpg_count", "gauge"), + "sessions": ("sessions", "gauge"), + "capacity": ("capacity", "gauge"), + "iops": ("iops", "derive"), + "read_bytes_per_sec": ("read_bytes_per_sec", "derive"), + "write_bytes_per_sec": ("write_bytes_per_sec", "derive"), + "total_bytes_per_sec": ("total_bytes_per_sec", "derive") + } + + def __init__(self, *args, **kwargs): + BaseCollector.__init__(self, *args, **kwargs) + + # Since the module can be imported by a parent class but not + # instantiated, the rtslib import is deferred until the first instance + # of the the class is created. This keeps the parent module simple + # and more importantly generic + if 'rtslib_fb.root' not in sys.modules.keys(): + + try: + from rtslib_fb.root import RTSRoot + except ImportError: + raise + + self._root = RTSRoot() + + self.clients = {} + self.cycle = 0 + + self.iops = 0 + self.read_bytes_per_sec = 0 + self.write_bytes_per_sec = 0 + self.total_bytes_per_sec = 0 + + def refresh(self): + """ + populate the instance by exploring rtslib + """ + + self.iops = 0 + self.read_bytes_per_sec = 0 + self.write_bytes_per_sec = 0 + self.total_bytes_per_sec = 0 + + if self.cycle == 10: + self.cycle = 0 + else: + self.cycle += 1 + + for node_acl in self._root.node_acls: + + client_name = node_acl.node_wwn + + if client_name not in self.clients: + new_client = Client(client_name) + self.clients[client_name] = new_client + + client = self.clients[client_name] + client.lun_count = 0 + client._cycle = self.cycle + + for lun in node_acl.mapped_luns: + client.lun_count += 1 + tpg_lun = lun.tpg_lun + lun_name = tpg_lun.storage_object.name + if lun_name not in client.luns: + lun = LUN(client, tpg_lun) + client.luns[lun._name] = lun + else: + lun = client.luns[lun_name] + + lun.refresh(self.cycle) + + self.iops += lun.iops + self.read_bytes_per_sec += lun.read_bytes_per_sec + self.write_bytes_per_sec += lun.write_bytes_per_sec + self.total_bytes_per_sec = self.read_bytes_per_sec + \ + self.write_bytes_per_sec + + def prune(self): + """ + drop child objects held by the instance, that are no longer in the + iSCSI config i.e. don't report on old information + """ + + for client_name in self.clients: + client = self.clients[client_name] + + for lun_name in client.luns: + lun = client.luns[lun_name] + if lun._cycle != self.cycle: + # drop the lun entry + self.logger.debug("pruning LUN '{}'".format(lun_name)) + + del client.luns[lun_name] + + if client._cycle != self.cycle: + # drop the client entry + self.logger.debug("pruning client '{}'".format(client_name)) + del self.clients[client_name] + + def dump(self): + + gw_stats = {} + client_stats = {} + + for metric in ISCSIGateway.all_metrics: + gw_stats[metric] = getattr(self, metric) + + for client_name in self.clients: + client = self.clients[client_name] + client_stats.update(client.dump()) + + return {"iscsi": { + "ceph_version": self.version, + "gw_name": {self.gateway_name: 0}, + "gw_stats": gw_stats, + "gw_clients": client_stats + } + } + + def _get_so(self): + return [so for so in self._root.storage_objects] + + def _get_node_acls(self): + return [node for node in self._root.node_acls] + + @property + def tpg_count(self): + return len([tpg for tpg in self._root.tpgs]) + + @property + def lun_count(self): + return len(self._get_so()) + + @property + def sessions(self): + return len([session for session in self._root.sessions]) + + @property + def gateway_name(self): + # Only the 1st gateway is considered/supported + gw_iqn = [gw.wwn for gw in self._root.targets][0] + return gw_iqn.replace('.', '-') + + @property + def client_count(self): + return len(self._get_node_acls()) + + @property + def capacity(self): + return sum([so.size for so in self._get_so()]) + + def get_stats(self): + + start = time.time() + + # populate gateway instance with the latest configuration from rtslib + stats = {} + if os.path.exists('/sys/kernel/config/target/iscsi'): + self.refresh() + + # Overtime they'll be churn in client and disks so we need to drop + # any entries from prior runs that are no longer seen in the iscsi + # configuration with the prune method + self.prune() + stats = self.dump() + else: + msg = "iSCSI Gateway is not active on this host" + self.logger.warning(msg) + self.error = True + self.error_msgs = [msg] + stats = {"iscsi": { + "ceph_version": self.version + } + } + + end = time.time() + + self.logger.info("LIO stats took {}s".format(end - start)) + + return stats + diff --git a/collectors/mon.py b/collectors/mon.py new file mode 100644 index 0000000..7c4f09b --- /dev/null +++ b/collectors/mon.py @@ -0,0 +1,629 @@ +#!/usr/bin/env python + +import rados +import rbd +import json +import threading +import time +import logging +import requests + +from collectors.base import BaseCollector +from collectors.common import merge_dicts, get_hostname, get_names + + +class CephState(object): + + def __init__(self, status=None, summary_list=[]): + self.status = status + + # create a list of health issues, ignoring the warning that Luminous + # issues + summary_data = [health_issue.get('summary', '') + for health_issue in summary_list] + self.summary = [health_desc for health_desc in summary_data + if 'update your health monitoring' not in health_desc] + + def update(self, state_object): + self.status = state_object.status + self.summary = state_object.summary + + @property + def status_items(self): + """ + The summary text will track pgs objects during recovery or backfill + operations, so every status could be different from the last as these + counts change. this function removes the int(s) from the status text + to reduce the frequency that a status check would generate an event + :return: items (set) unique set of status items + """ + priority_errors = ['mons', 'osds', 'flag(s)'] + + items = set() + for summary_text in self.summary: + if any(prio_field in summary_text + for prio_field in priority_errors): + # priority health messages kept as is + items.add(summary_text) + else: + # other messages get their 'counts' removed + new_text = filter(lambda x: not x.isdigit(), summary_text) + items.add(new_text) + + return items + + @property + def status_str(self): + return "{} : {}".format(self.status, + ','.join(self.summary)) + + +class RBDScanner(threading.Thread): + + def __init__(self, cluster_name, pool_name): + self.cluster_name = cluster_name + self.pool_name = pool_name + self.num_rbds = 0 + self.logger = logging.getLogger('cephmetrics') + + threading.Thread.__init__(self) + + def run(self): + rbd_images = [] + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + self.logger.debug("scan of '{}' starting".format(self.pool_name)) + with rados.Rados(conffile=conf_file) as cluster: + with cluster.open_ioctx(self.pool_name) as ioctx: + rbd_inst = rbd.RBD() + self.logger.debug("listing rbd's in {}".format(self.pool_name)) + rbd_images = rbd_inst.list(ioctx) + + self.logger.info("pool scan complete for '{}'".format(self.pool_name)) + self.num_rbds = len(rbd_images) + + +class Mon(BaseCollector): + + health = { + "HEALTH_OK": 0, + "HEALTH_WARN": 4, + "HEALTH_ERR": 8 + } + + osd_state = { + "up": 0, + "down": 1 + } + + # metrics are declared, where each element has a description and collectd + # data type. The description is used to ensure the names sent by collectd + # remain the same even if the source name changes in ceph. + cluster_metrics = { + "num_mon": ("num_mon", "gauge"), + "num_mon_quorum": ("num_mon_quorum", "gauge"), + "num_rbds": ("num_rbds", "gauge"), + "num_osd_hosts": ("num_osd_hosts", "gauge"), + "num_osd": ("num_osd", "gauge"), + "num_osd_up": ("num_osd_up", "gauge"), + "num_osd_in": ("num_osd_in", "gauge"), + "osd_epoch": ("osd_epoch", "gauge"), + "osd_bytes": ("osd_bytes", "gauge"), + "osd_bytes_used": ("osd_bytes_used", "gauge"), + "osd_bytes_avail": ("osd_bytes_avail", "gauge"), + "num_pool": ("num_pool", "gauge"), + "num_pg": ("num_pg", "gauge"), + "num_pg_active_clean": ("num_pg_active_clean", "gauge"), + "num_pg_active": ("num_pg_active", "gauge"), + "num_pg_peering": ("num_pg_peering", "gauge"), + "num_object": ("num_object", "gauge"), + "num_object_degraded": ("num_object_degraded", "gauge"), + "num_object_misplaced": ("num_object_misplaced", "gauge"), + "num_object_unfound": ("num_object_unfound", "gauge"), + "num_bytes": ("num_bytes", "gauge"), + "num_mds_up": ("num_mds_up", "gauge"), + "num_mds_in": ("num_mds_in", "gauge"), + "num_mds_failed": ("num_mds_failed", "gauge"), + "mds_epoch": ("mds_epoch", "gauge"), + "health": ("health", "gauge") + } + + pool_client_metrics = { + 'bytes_sec': ("bytes_sec", "gauge"), + 'op_per_sec': ("op_per_sec", "gauge"), + 'read_bytes_sec': ("read_bytes_sec", "gauge"), + 'write_op_per_sec': ("write_op_per_sec", "gauge"), + 'write_bytes_sec': ("write_bytes_sec", "gauge"), + 'read_op_per_sec': ("read_op_per_sec", "gauge") + } + + pool_recovery_metrics = { + "recovering_objects_per_sec": ("recovering_objects_per_sec", "gauge"), + "recovering_bytes_per_sec": ("recovering_bytes_per_sec", "gauge"), + "recovering_keys_per_sec": ("recovering_keys_per_sec", "gauge"), + "num_objects_recovered": ("num_objects_recovered", "gauge"), + "num_bytes_recovered": ("num_bytes_recovered", "gauge"), + "num_keys_recovered": ("num_keys_recovered", "gauge") + } + + mon_states = { + "mon_status": ("mon_status", "gauge") + } + + all_metrics = merge_dicts(pool_recovery_metrics, pool_client_metrics) + all_metrics = merge_dicts(all_metrics, cluster_metrics) + all_metrics = merge_dicts(all_metrics, mon_states) + + def __init__(self, *args, **kwargs): + BaseCollector.__init__(self, *args, **kwargs) + + self.admin_socket = ('/var/run/ceph/{}-mon.' + '{}.asok'.format(self.cluster_name, + get_hostname())) + + self.last_state = CephState() + + self.ip_names = get_names() + + if self.version < 12: + self.get_mon_health = self._mon_health + else: + self.get_mon_health = self._mon_health_new + + + + if self._parent: + self.logger.debug("Event URL is : " + "{}".format(self._parent.event_url)) + + def _mon_command(self, cmd_request): + """ Issue a command to the monitor """ + + buf_s = '{}' + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + + start = time.time() + with rados.Rados(conffile=conf_file) as cluster: + cmd = {'prefix': cmd_request, 'format': 'json'} + rc, buf_s, out = cluster.mon_command(json.dumps(cmd), b'') + end = time.time() + + self.logger.debug("_mon_command call '{}' :" + " {:.3f}s".format(cmd_request, + (end - start))) + + return json.loads(buf_s) + + @staticmethod + def get_feature_state(summary_data, pg_states): + """ + Look at the summary list to determine the state of RADOS features + :param summary_data: (list) summary data from a ceph health command + :return: (dict) dict indexed by feature + 0 Inactive, 1 Active, 2 Disabled + """ + feature_lookup = {"noscrub": "scrub", + "nodeep-scrub": "deep_scrub", + "norecover": "recovery", + "nobackfill": "backfill", + "norebalance": "rebalance", + "noout": "out", + "nodown": "down"} + + # Start with all features inactive i.e. enabled + feature_state = {feature_lookup.get(key): 0 for key in feature_lookup} + + for summary in summary_data: + summary_desc = summary.get('summary') + if "flag(s) set" in summary_desc: + flags = summary_desc.replace(' flag(s) set', '').split(',') + for disabled_feature in flags: + if disabled_feature in feature_lookup: + feature = feature_lookup.get(disabled_feature) + feature_state[feature] = 2 # feature disabled + + # Now use the current pg state names to determine whether a feature is + # active - if not it stays set to '0', which means inactive + pg_state_names = [pg_state.get('name') for pg_state in pg_states] + for pg_state in pg_state_names: + states = pg_state.split('+') + if 'recovering' in states: + feature_state['recovery'] = 1 # Active + continue + if 'backfilling' in states: + feature_state['backfill'] = 1 + continue + if 'deep' in states: + feature_state['deep_scrub'] = 1 + continue + if 'scrubbing' in states: + feature_state['scrub'] = 1 + + return feature_state + + @classmethod + def check_stuck_pgs(cls, summary_list): + bad_pg_words = ['pgs', 'stuck', 'inactive'] + stuck_pgs = 0 + for summary_data in summary_list: + if summary_data.get('severity') != 'HEALTH_ERR': + continue + if all(trigger in summary_data.get('summary') + for trigger in bad_pg_words): + stuck_pgs = int(summary_data.get('summary').split()[0]) + + return stuck_pgs + + def _mon_health_new(self, cluster_data): + + cluster, health_data = self._mon_health_common(cluster_data) + + mon_status_output = self._mon_command('mon_status') + quorum_list = mon_status_output.get('quorum') + mon_list = mon_status_output.get('monmap').get('mons') + mon_status = {} + for mon in mon_list: + state = 0 if mon.get('rank') in quorum_list else 4 + mon_status[mon.get('name')] = state + + cluster['mon_status'] = mon_status + + self.manage_event(health_data.get('status'), + health_data.get('summary', []), + mon_status) + + return cluster + + def _mon_health_common(self, cluster_data): + + # for v12 (Luminous and beyond) add the following setting to + # ceph.conf "mon_health_preluminous_compat=true" + # this will provide the same output as pre-luminous + + health_data = {} + cluster = {} + + pg_data = self._mon_command("pg stat") + health_data = self._mon_command("health") + health_text = health_data.get('status', + health_data.get('overall_status', '')) + + cluster = {Mon.cluster_metrics[k][0]: cluster_data[k] + for k in cluster_data} + + health_num = Mon.health.get(health_text, 16) + + cluster['health'] = health_num + + pg_states = pg_data.get('num_pg_by_state') # list of dict name,num + health_summary = health_data.get('summary', []) # list of issues + cluster['num_pgs_stuck'] = Mon.check_stuck_pgs(health_summary) + cluster['features'] = Mon.get_feature_state(health_summary, + pg_states) + + self.logger.debug( + 'Features:{}'.format(json.dumps(cluster['features']))) + + return cluster, health_data + + def get_cluster_state(self): + return self._admin_socket().get('cluster', {}) + + def _mon_health(self, cluster_data): + + cluster, health_data = self._mon_health_common(cluster_data) + + services = health_data.get('health').get('health_services') + mon_status = {} + for svc in services: + if 'mons' in svc: + # Each monitor will have a numeric value denoting health + mon_status = {mon.get('name'): Mon.health.get(mon.get('health')) + for mon in svc.get('mons')} + + cluster['mon_status'] = mon_status + + self.manage_event(health_data.get('overall_status'), + health_data.get('summary', []), + mon_status) + + return cluster + + def manage_event(self, health_text, health_summary, mon_status): + + if not self._parent: + # invoked without a parent, as part of system tests + return + elif not self._parent.event_url: + # event generation skipped + return + + candidates = [mon_name for mon_name in sorted(mon_status) + if mon_status.get(mon_name) == 0] + + if candidates: + sender = candidates[0] + if sender not in self.ip_names: + # only one mon should send, so if that's not us do nothing + return + else: + # no suitable mon to send the alert + self.logger.error("Unable to send ANY event - no valid mon " + "found") + return + + # If we're here, the current host is suitable to send an event so lets + # look deeper to see if we need to + + current_state = CephState(health_text, health_summary) + self.logger.debug("health:{}".format(current_state.status)) + self.logger.debug("health:{}".format(current_state.summary)) + send_it = False + + if health_text != self.last_state.status: + # Overall health has changed, so just send the current state! + send_it = True + else: + # look deeper - only send if the list of issues is different + if health_text == 'HEALTH_OK': + # nothing to do, nothing to send + pass + else: + if self.last_state.status_items != current_state.status_items: + send_it = True + + self.last_state.update(current_state) + + if send_it: + tag = 'health_ok' if current_state.status == 'HEALTH_OK' \ + else 'health_alert' + + self.logger.info("sending cluster status to " + "{}".format(self._parent.event_url)) + self.logger.debug(current_state.status_str) + + rc = Mon.post_event(self._parent.event_url, + tag, + current_state.status_str) + + if rc != 200: + self.error = True + self.error_msgs = ['POST request to {} failed ({}) - ' + 'firewall?'.format(self._parent.event_url, + rc)] + self.logger.warning("Unable to send event - graphite response " + "{}".format(rc)) + + else: + # no real change to report + pass + + @staticmethod + def post_event(url, tag_name, event_message): + + headers = {"Content-Type": "application/json"} + + try: + r = requests.post(url, + headers=headers, + data='{{"what":"Ceph Health",' + '"tags":"{}",' + '"data":"{}"}}'.format(tag_name, + event_message)) + except requests.ConnectionError: + # if we hit this, the endpoint wasn't there (graphite web was not + # accessible) so identify that issue as a server error (500) + return 500 + + else: + return r.status_code + + @classmethod + def _seed(cls, metrics): + return {metrics[key][0]: 0 for key in metrics} + + def display_names(self, metric_format, metrics): + """ + convert the keys to the static descriptions + :return: + """ + return {metric_format[k][0]: metrics[k] + for k in metrics} if metrics else {} + + def _get_df_stats(self): + """ get 'ceph df' stats from rados """ + raw_stats = self._mon_command('df') + for pool in raw_stats['pools']: + pool['name'] = pool['name'].replace('.', '_') + return raw_stats + + def _get_pool_stats(self): + """ get pool stats from rados """ + + raw_stats = self._mon_command('osd pool stats') + pool_stats = {} + + # process each pool + for pool in raw_stats: + + pool_name = pool['pool_name'].replace('.', '_') + client_io = self.display_names(Mon.pool_client_metrics, + pool.get('client_io_rate')) + recovery = self.display_names(Mon.pool_recovery_metrics, + pool.get('recovery_rate')) + + pool_md = {} + if client_io: + + # Add pool level aggregation + client_io['bytes_sec'] = client_io.get('read_bytes_sec', 0) + \ + client_io.get('write_bytes_sec', 0) + client_io["op_per_sec"] = client_io.get('read_op_per_sec', 0)+ \ + client_io.get('write_op_per_sec', 0) + pool_md = client_io + + else: + pool_md = Mon._seed(Mon.pool_client_metrics) + + if recovery: + pool_md = merge_dicts(pool_md, recovery) + else: + pool_md = merge_dicts(pool_md, Mon._seed( + Mon.pool_recovery_metrics)) + + pool_stats[pool_name] = pool_md + + return pool_stats + + def _get_osd_states(self): + + self.logger.debug("fetching osd states from the local mon") + raw = self._mon_command('osd dump') + osd_hosts = set() + osds = {} + for osd in raw.get('osds'): + cluster_addr = osd.get('cluster_addr').split(':')[0] + osd_hosts.add(cluster_addr) + + # NB. The key for the osds dict must be a string as the dict is + # flattened when the metric name is derived in the parent collectd + # module. If it is not converted, you get a TypeError + osds[str(osd.get('osd'))] = {"up": osd.get('up'), + "in": osd.get('in')} + + return len(osd_hosts), osds + + @staticmethod + def _select_pools(pools, mons): + """ + determine the pools this mon should scan based on it's name. We select + pools from the an offset into the pool list, and then repeat at an + interval set by # mons in the configuration. This splits up the pools + we have, so each mon looks at a discrete set of pools instead of all + mons performing all scans. + :param pools: (list) rados pool names + :param mons: (list) monitor names from ceph health + :return: (list) of pools this monitor should scan. empty list if the + monitor name mismatches - so no scans done + """ + + pools_to_scan = [] + + try: + freq = mons.index(get_hostname()) + except ValueError: + # this host's name is not in the monitor list? + # twilight zone moment + pass + else: + + pools_to_scan = [pools[ptr] + for ptr in xrange(freq, len(pools), len(mons))] + + return pools_to_scan + + def get_rbd_pools(self): + """ + Look at the rados pools to filter out pools that would normally not + be associated with rbd images + :return: (list) of pools that may contain rbd images + """ + skip_pools = ('default.rgw', '.rgw.') + + start = time.time() + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + with rados.Rados(conffile=conf_file) as cluster: + rados_pools = sorted(cluster.list_pools()) + end = time.time() + + self.logger.debug('lspools took {:.3f}s'.format(end - start)) + + filtered_pools = [pool for pool in rados_pools + if not pool.startswith(skip_pools)] + + return filtered_pools + + def _get_rbds(self, monitors): + """ + Scan a subset of the rados pools for rbd images. Each mon collector + will scan a subset of the pools to distribute the load using the + RBSScanner class + :param monitors: (dict) monitor names and states + :return total_rbs: (int) total rbd images found across pools + """ + + pool_list = self.get_rbd_pools() + mon_list = sorted(monitors.keys()) + my_pools = Mon._select_pools(pool_list, mon_list) + self.logger.debug("Pools to be scanned on this mon" + " : {}".format(','.join(my_pools))) + threads = [] + + start = time.time() + + for pool in my_pools: + thread = RBDScanner(self.cluster_name, pool) + thread.start() + threads.append(thread) + + # wait for all threads to complete + for thread in threads: + thread.join(1) + + end = time.time() + self.logger.debug("rbd scans {:.3f}s".format((end - start))) + + total_rbds = sum([thread.num_rbds for thread in threads]) + self.logger.debug("total rbds found : {}".format(total_rbds)) + + for thread in threads: + del thread + + return total_rbds + + def get_stats(self): + """ + method associated with the plugin callback to gather the metrics + :return: (dict) metadata describing the state of the mon/osd's etc + """ + + start = time.time() + + # Attempt to read the admin socket for cluster data + cluster_data = self.get_cluster_state() + + if cluster_data: + + # read from the admin socket was OK, so process the data + cluster_state = self.get_mon_health(cluster_data) + pool_stats = self._get_pool_stats() + df_stats = self._get_df_stats() + for df_obj in df_stats['pools']: + pool_name = df_obj['name'] + pool_stats[pool_name] = merge_dicts( + pool_stats[pool_name], df_obj['stats']) + num_osd_hosts, osd_states = self._get_osd_states() + + cluster_state['num_osd_hosts'] = num_osd_hosts + cluster_state['num_rbds'] = self._get_rbds(cluster_state['mon_status']) + + all_stats = merge_dicts(cluster_state, {"pools": pool_stats, + "osd_state": osd_states}) + else: + # problem reading from the admin socket, record it in cephmetrics + # log and set the object's error flag so it can be picked up at the + # layer above the Mon instance (Ceph instance -> collectd log) + all_stats = {} + self.error = True + msg = 'MON socket is not available...is ceph-mon active?' + self.error_msgs = [msg] + self.logger.warning(msg) + + all_stats['ceph_version'] = self.version + + end = time.time() + self.logger.info("mon get_stats call : {:.3f}s".format((end - start))) + + return { + "mon": all_stats + } + diff --git a/collectors/osd.py b/collectors/osd.py new file mode 100644 index 0000000..88b4f2f --- /dev/null +++ b/collectors/osd.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python + +import os +import time +import math + +from collectors.base import BaseCollector +from collectors.common import (todict, fread, freadlines, merge_dicts, + IOstat, Disk) + + +class OSDstats(object): + + osd_capacity = { + "stat_bytes": ("stat_bytes", "gauge"), + "stat_bytes_used": ("stat_bytes_used", "gauge"), + "stat_bytes_avail": ("stat_bytes_avail", "gauge") + } + + perf_metrics = { + "filestore": { + "journal_latency", + "commitcycle_latency", + "apply_latency", + "queue_transaction_latency_avg" + }, + "bluestore": { + "submit_lat", + "throttle_lat", + "state_aio_wait_lat", + "kv_flush_lat", + "kv_commit_lat" + } + } + + def __init__(self, osd_type='filestore'): + self._current = {} + self._previous = {} + self._osd_type = osd_type + self.osd_type = Disk.osd_types[osd_type] + self.osd_percent_used = 0 + + def update(self, stats): + """ + update the objects attributes based on the 'stats' dict + :param stats: (dict) containing performance ('filestore' or 'bluestore') + and capacity info ('osd') + :return: None + """ + + if self._current: + self._previous = self._current + self._current = stats[self._osd_type] + else: + self._current = stats[self._osd_type] + + for attr in OSDstats.perf_metrics[self._osd_type]: + + if attr not in self._current: + # skip if the attribute needed isn't available + # eg. early versions of bluestore didn't have a 'stable' + # set of perf counters + continue + + if self._previous: + d_sum = self._current[attr].get('sum') - \ + self._previous[attr].get('sum') + d_avgcount = self._current[attr].get('avgcount') - \ + self._previous[attr].get('avgcount') + + if d_sum == 0 or d_avgcount == 0: + val = 0 + else: + val = float(d_sum) / d_avgcount + else: + # no previous value, so set to 0 + val = 0 + + setattr(self, attr, val) + + for attr in stats['osd']: + setattr(self, attr, stats['osd'].get(attr)) + + self.osd_percent_used = math.ceil((float(self.stat_bytes_used) / + self.stat_bytes) * 100) + + +class OSDs(BaseCollector): + + all_metrics = merge_dicts(Disk.metrics, IOstat.metrics) + supported_object_stores = ['filestore', 'bluestore'] + + def __init__(self, *args, **kwargs): + BaseCollector.__init__(self, *args, **kwargs) + self.timestamp = int(time.time()) + + self.osd = {} # dict of disk objects, each disk contains osd_id + self.jrnl = {} # dict of journal devices (if not collocated) + self.osd_id_list = [] + self.dev_lookup = {} # dict dev_name -> osd | jrnl + self.osd_count = 0 + + def __repr__(self): + + s = '' + for disk in self.osd: + s += "{}\n".format(disk) + dev = self.osd[disk] + + for var in vars(dev): + if not var.startswith('_'): + s += "{} ... {}\n".format(var, getattr(dev, var)) + return s + + def _fetch_osd_stats(self, osd_id, osd_type='filestore'): + + # NB: osd stats are cumulative + + stats = {} + osd_socket_name = '/var/run/ceph/{}-osd.{}.asok'.format(self.cluster_name, + osd_id) + + if not os.path.exists(osd_socket_name): + # all OSD's should expose an admin socket, so if it's missing + # the osd hasn't initialized properly or it's gone down + msg = "Socket file missing for OSD {}".format(osd_id) + self.logger.error(msg) + self.error = True + self.error_msg = msg + return + + self.logger.debug("fetching osd stats for osd {}".format(osd_id)) + resp = self._admin_socket(socket_path=osd_socket_name) + + perf_stats = resp.get(osd_type) + + stats[osd_type] = {key_name: perf_stats.get(key_name) + for key_name in OSDstats.perf_metrics[osd_type]} + + osd_stats = resp.get('osd') + + # Add disk usage stats + stats['osd'] = {key_name: osd_stats.get(key_name) + for key_name in OSDstats.osd_capacity.keys()} + + return stats + + @staticmethod + def get_osd_type(osd_path): + + osd_type_fname = os.path.join(osd_path, 'type') + if os.path.exists(osd_type_fname): + return fread(osd_type_fname) + else: + if os.path.exists(os.path.join(osd_path, 'journal')): + return "filestore" + else: + raise ValueError("Unrecognised OSD type") + + def _dev_to_osd(self): + """ + Look at the system to determine which disks are acting as OSD's + """ + + # the logic here uses the mount points to determine which OSD's are + # in the system. The encryption state is determine just by the use + # devicemapper (i.e. /dev/mapper prefixed devices) - since at this time + # this is all dm is used for. + + osd_indicators = {'var', 'lib', 'osd'} + + for mnt in freadlines('/proc/mounts'): + items = mnt.split(' ') + dev_path, path_name = items[:2] + if path_name.startswith('/var/lib'): + # take a close look since this is where ceph osds usually + # get mounted + dirs = set(path_name.split('/')) + if dirs.issuperset(osd_indicators): + + # get the osd_id from the name is the most simple way + # to get the id, due to naming conventions. If this fails + # though, plan 'b' is the whoami file + osd_id = path_name.split('-')[-1] + if not osd_id.isdigit(): + osd_id = fread(os.path.join(path_name, 'whoami')) + + if osd_id not in self.osd: + osd_type = OSDs.get_osd_type(path_name) + self.osd[osd_id] = OSDstats(osd_type=osd_type) + self.osd_id_list.append(osd_id) + + osd_type = self.osd[osd_id]._osd_type + if osd_type == 'filestore': + if dev_path.startswith('/dev/mapper'): + encrypted = 1 + uuid = dev_path.split('/')[-1] + partuuid = '/dev/disk/by-partuuid/{}'.format(uuid) + dev_path = os.path.realpath(partuuid) + osd_device = dev_path.split('/')[-1] + else: + encrypted = 0 + osd_device = dev_path.split('/')[-1] + + elif osd_type == 'bluestore': + block_link = os.path.join(path_name, 'block') + osd_path = os.path.realpath(block_link) + osd_device = osd_path.split('/')[-1] + encrypted = 0 + else: + raise ValueError("Unknown OSD type encountered") + + # if the osd_id hasn't been seem neither has the + # disk + self.osd[osd_device] = Disk(osd_device, + path_name=path_name, + osd_id=osd_id, + in_osd_type=osd_type, + encrypted=encrypted) + self.dev_lookup[osd_device] = 'osd' + self.osd_count += 1 + + if osd_type == 'filestore': + journal_link = os.path.join(path_name, 'journal') + else: + journal_link = os.path.join(path_name, 'block.wal') + + if os.path.exists(journal_link): + link_tgt = os.readlink(journal_link) + if link_tgt.startswith('/dev/mapper'): + encrypted = 1 + else: + encrypted = 0 + + partuuid_path = os.path.join('/dev/disk/by-partuuid', + link_tgt.split('/')[-1]) + jrnl_path = os.path.realpath(partuuid_path) + jrnl_dev = jrnl_path.split('/')[-1] + + if jrnl_dev not in self.osd: + self.jrnl[jrnl_dev] = Disk(jrnl_dev, + osd_id=osd_id, + in_osd_type=osd_type, + encrypted=encrypted) + + self.dev_lookup[jrnl_dev] = 'jrnl' + + else: + # No journal or WAL link..? + pass + + def _stats_lookup(self): + """ + Grab the disk stats from /proc/diskstats, and the key osd perf dump + counters + """ + + now = time.time() + interval = int(now) - self.timestamp + self.timestamp = int(now) + + # Fetch diskstats from the OS + for perf_entry in freadlines('/proc/diskstats'): + + field = perf_entry.split() + dev_name = field[2] + + device = None + if self.dev_lookup.get(dev_name, None) == 'osd': + device = self.osd[dev_name] + elif self.dev_lookup.get(dev_name, None) == 'jrnl': + device = self.jrnl[dev_name] + + if device: + new_stats = field[3:] + + if device.perf._current: + device.perf._previous = device.perf._current + device.perf._current = new_stats + else: + device.perf._current = new_stats + + device.perf.compute(interval) + + end = time.time() + self.logger.debug("OS disk stats calculated in " + "{:.4f}s".format(end-now)) + + # fetch stats from each osd daemon + osd_stats_start = time.time() + for osd_id in self.osd_id_list: + + osd_type = self.osd[osd_id]._osd_type + + if osd_type in OSDs.supported_object_stores: + + osd_stats = self._fetch_osd_stats(osd_id, osd_type) + if osd_stats: + osd_device = self.osd[osd_id] + osd_device.update(osd_stats) + else: + self.logger.warning("OSD stats for osd.{} not " + "available".format(osd_id)) + + else: + self.logger.warning("Unknown OSD type encountered for " + "osd.{}".format(osd_id)) + + osd_stats_end = time.time() + self.logger.debug("OSD perf dump stats collected for {} OSDs " + "in {:.3f}s".format(len(self.osd_id_list), + (osd_stats_end - osd_stats_start))) + + @staticmethod + def _dump_devs(device_dict): + + dumped = {} + + for dev_name in sorted(device_dict): + device = device_dict[dev_name] + dumped[dev_name] = todict(device) + + return dumped + + def dump(self): + """ + dump the osd object(s) to a dict. The object *must* not have references + to other objects - if this rule is broken cephmetrics caller will fail + when parsing the dict + + :return: (dict) dictionary representation of this OSDs on this host + """ + + osds = OSDs._dump_devs(self.osd) + osds['ceph_version'] = self.version + osds['num_osds'] = self.osd_count + + return { + "osd": osds, + "jrnl": OSDs._dump_devs(self.jrnl) + } + + def get_stats(self): + + start = time.time() + + self._dev_to_osd() + self._stats_lookup() + + end = time.time() + + self.logger.info("osd get_stats call " + ": {:.3f}s".format((end - start))) + + return self.dump() diff --git a/collectors/rgw.py b/collectors/rgw.py new file mode 100644 index 0000000..687f17f --- /dev/null +++ b/collectors/rgw.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +import time +import glob + +from collectors.base import BaseCollector +from collectors.common import get_hostname, merge_dicts + + +class RGW(BaseCollector): + + simple_metrics = { + "req": ("requests", "derive"), + "failed_req": ("requests_failed", "derive"), + "get": ("gets", "derive"), + "get_b": ("get_bytes", "derive"), + "put": ("puts", "derive"), + "put_b": ("put_bytes", "derive"), + "qlen": ("qlen", "derive"), + "qactive": ("requests_active", "derive") + } + + int_latencies = [ + "get_initial_lat", + "put_initial_lat" + ] + + latencies = { + "get_initial_lat_sum": ("get_initial_lat_sum", "derive"), + "get_initial_lat_avgcount": ("get_initial_lat_avgcount", "derive"), + "put_initial_lat_sum": ("put_initial_lat_sum", "derive"), + "put_initial_lat_avgcount": ("put_initial_lat_avgcount", "derive") + } + + all_metrics = merge_dicts(simple_metrics, latencies) + + def __init__(self, *args, **kwargs): + BaseCollector.__init__(self, *args, **kwargs) + + self.host_name = get_hostname() + + def _get_rgw_data(self): + + rgw_sockets = glob.glob('/var/run/ceph/{}-client.rgw.' + '{}.*asok'.format(self.cluster_name, + self.host_name)) + if rgw_sockets: + + if len(rgw_sockets) > 1: + self.logger.warning("multiple rgw sockets found - " + "data sent from {}".format(rgw_sockets[0])) + + response = self._admin_socket(socket_path=rgw_sockets[0]) + + if response: + key_name = 'client.rgw.{}'.format(self.host_name) + return response.get(key_name) + else: + # admin_socket call failed + return {} + else: + # no socket found on the host, nothing to send to caller + return {} + + @staticmethod + def stats_filter(stats): + # pick out the simple metrics + + filtered = {key: stats[key] for key in RGW.simple_metrics} + + for key in RGW.int_latencies: + for _attr in stats[key]: + new_key = "{}_{}".format(key, _attr) + filtered[new_key] = stats[key].get(_attr) + + return filtered + + def get_stats(self): + + start = time.time() + + raw_stats = self._get_rgw_data() + if raw_stats: + stats = RGW.stats_filter(raw_stats) + else: + stats = {} + self.error = True + msg = 'RGW socket not available...radosgw running?' + self.error_msgs = [msg] + self.logger.warning(msg) + + stats['ceph_version'] = self.version + + end = time.time() + + self.logger.info("RGW get_stats : {:.3f}s".format((end - start))) + + return { + "rgw": stats + } diff --git a/dashUpdater.py b/dashUpdater.py new file mode 100644 index 0000000..e166bab --- /dev/null +++ b/dashUpdater.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python2 + +import os +import sys +import logging +import json +import yaml +from requests import get, post, put +import argparse +import socket + +__author__ = 'Paul Cuzner' +__version__ = '2.0' + +HEADERS = {"Accept": "application/json", + "Content-Type": "application/json" + } + +# variables that need to be updated for the local environment must be defined +# to grafana as 'custom', for the updater to work + + +class Config(object): + pass + + +class DashBoardException(Exception): + pass + + +def get_options(): + """ + Process runtime options + + """ + # Set up the runtime overrides + parser = argparse.ArgumentParser(prog='dashmgr', + description='Manage Ceph Monitoring ' + 'dashboards in Grafana') + parser.add_argument('-A', '--update-alerts', action='store_true', + default=False) + parser.add_argument('-c', '--config-file', type=str, + help='path of the config file to use', + default=os.path.join(os.getcwd(), 'dashboard.yml')) + parser.add_argument('-D', '--dashboard-dir', type=str, + help='path to the directory containing dashboards', + default=os.path.join( + os.getcwd(), 'dashboards/cephmetrics-graphite')) + parser.add_argument('-m', '--mode', type=str, + help='run mode', + choices=['update', 'refresh'], + default='update') + parser.add_argument('-d', '--debug', action='store_true', + default=False, + help='run with additional debug') + parser.add_argument('-v', '--version', action='version', + version='%(prog)s - {}'.format(__version__)) + + return parser.parse_args() + + +def fread(file_name=None): + with open(file_name) as f: + f_data = f.read() + return f_data + + +def port_open(port, host='localhost'): + """ + Check a given port is accessible + :param port: (int) port number to check + :param host: (str)hostname to check, default is localhost + :return: (bool) true if the port is accessible + """ + socket.setdefaulttimeout(1) + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect_ex((host, port)) + sock.shutdown(socket.SHUT_RDWR) + sock.close() + return True + except socket.error: + return False + + +def get_config(file_name): + """ + read a given file, and attempt to load as yaml + :return (Config) config object instance + """ + if os.path.exists(file_name): + config_data = fread(file_name) + try: + yaml_config = yaml.load(config_data) + except: + return None + else: + cfg = Config() + cfg.grafana_host = yaml_config.get('_grafana_host', 'localhost') + cfg.dashboards = yaml_config.get('_dashboards', []) + cfg.auth = yaml_config.get('_credentials', {"user": 'admin', + "password": "admin"}) + cfg.grafana_credentials = (cfg.auth.get('user'), + cfg.auth.get('password')) + cfg.grafana_port = yaml_config.get('_grafana_port', 3000) + cfg.home_dashboard = yaml_config.get('_home_dashboard', + 'ceph-at-a-glance') + cfg.alert_dashboard = yaml_config.get('_alert_dashboard', + 'alert-status') + cfg.domain = yaml_config.get('domain', '') + cfg.yaml = yaml_config + return cfg + + else: + return None + + +def update_dashboard(dashboard_json, vars_to_update): + updates_made = 0 + templating = dashboard_json['dashboard'].get('templating') + template_names = [] + for l in templating.get('list'): + template_name = l.get('name') + if template_name in vars_to_update: + + logger.debug("\tprocessing variable '{}'".format(template_name)) + logger.debug("\tbefore") + logger.debug("\t{}".format(l)) + template_names.append(template_name) + replacement_vars = vars_to_update.get(template_name) + + if isinstance(replacement_vars, str): + replacement_vars = [replacement_vars] + + l['query'] = ','.join(replacement_vars) + num_new_items = len(replacement_vars) + if num_new_items == 1: + l['current'] = {"text": replacement_vars[0], + "value": replacement_vars[0]} + l['options'] = [{"text": replacement_vars[0], + "selected": True, + "value": replacement_vars[0]}] + else: + l['current'] = {"text": "All", + "selected": True, + "value": "$__all"} + l['options'] = [{"text": "All", + "selected": True, + "value": "$__all"}] + for item in replacement_vars: + l['options'].append({"text": item, + "selected": False, + "value": item}) + + logger.debug("\tafter") + logger.debug("\t{}".format(l)) + updates_made += 1 + + logger.info("- {} templating variables updated " + ": {}".format(updates_made, + ','.join(template_names))) + return dashboard_json + + +def load_dashboard(dashboard_dir, dashboard_name): + + sample_dashboard = os.path.join(dashboard_dir, + "{}.json".format(dashboard_name)) + if os.path.exists(sample_dashboard): + # load it in + dashboard_data = fread(sample_dashboard) + + # if domain has not been given, we need to remove it from the queries + if not config.domain: + dashboard_data = dashboard_data.replace('$domain.', '') + + try: + dashjson = json.loads(dashboard_data) + except: + raise DashBoardException("Invalid json in {} " + "dashboard".format(dashboard_name)) + else: + logger.debug("- {} sample loaded from {}".format(dashboard_name, + dashboard_dir)) + del dashjson['meta'] + dashjson['overwrite'] = True + + # 'id' must be null for this to be a create, if it is anything + # else grafana will attempt an update, which will fail + # with a 404 + dashjson['dashboard']['id'] = None + return dashjson + else: + logger.warning("- sample not available for {}, " + "skipping".format(dashboard_name)) + return {} + + +def get_dashboard(dashboard_name): + + resp = get("http://{}:{}/api/dashboards/" + "db/{}".format(config.grafana_host, + config.grafana_port, + dashboard_name), + auth=config.grafana_credentials) + + if resp.status_code == 404: + logger.info("- dashboard not found in Grafana") + return resp.status_code, {} + + elif resp.status_code == 200: + logger.debug("- fetch of {} from Grafana " + "successful".format(dashboard_name)) + return resp.status_code, resp.json() + else: + raise DashBoardException("Unknown problem fetching dashboard") + + +def put_dashboard(dashjson): + upload_str = json.dumps(dashjson) + resp = post("http://{}:{}/api/dashboards/" + "db".format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=upload_str) + + return resp.status_code + + +def star_dashboard(dashboard_id): + + resp = post('http://{}:{}/api/user/stars/' + 'dashboard/{}'.format(config.grafana_host, + config.grafana_port, + dashboard_id), + headers=HEADERS, + auth=config.grafana_credentials) + + if resp.status_code == 200: + logger.debug("- dashboard starred successfully") + else: + logger.warning("- starring dashboard with id {} " + "failed : {}".format(dashboard_id, + resp.status_code)) + return resp.status_code + +def set_home_dashboard(home_dashboard): + # Ideally we should just check the json returned from an org query...but + # 4.3 of grafana doesn't return the home dashboard or theme settings! + + logger.debug("- checking '{}' is starred".format(home_dashboard)) + + http_rc, dashjson = get_dashboard(home_dashboard) + if http_rc == 200 and dashjson: + + dash_id = dashjson.get('dashboard').get('id') + is_starred = dashjson.get('meta').get('isStarred') + if not is_starred: + # star it + http_rc = star_dashboard(dash_id) + is_starred = True if http_rc == 200 else False + + if is_starred: + # update the org's home dashboard + resp = put('http://{}:{}/api/org/' + 'preferences'.format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=json.dumps({"name": "Main Org.", + "theme": "light", + "homeDashboardId": dash_id})) + + if resp.status_code == 200: + logger.info("- setting home dashboard complete") + else: + logger.error("- setting home dashboard failed") + + return resp.status_code + + else: + logger.error("- unable to access dashboard {}".format(home_dashboard)) + + return http_rc + + +def setup_logging(): + + logger = logging.getLogger('dashUpdater') + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler(stream=sys.stdout) + if opts.debug: + stream_handler.setLevel(logging.DEBUG) + else: + stream_handler.setLevel(logging.INFO) + + logger.addHandler(stream_handler) + + return logger + + +def get_notification_id(channel_name): + """ + Check whether the given notification channel has been defined to Grafana + :param (str) notification channel name + :return: (int) id of the channel, or 0 for doesn't exist + """ + + resp = get("http://{}:{}/api/" + "alert-notifications".format(config.grafana_host, + config.grafana_port), + auth=config.grafana_credentials) + + if resp.status_code == 200: + notifications = resp.json() # list if dicts returned by Grafana + + # convert the list into a dict for lookup purposes + channels = {channel.get('name'): channel.get('id') + for channel in notifications} + if channel_name in channels: + return channels[channel_name] + else: + return 0 + else: + raise DashBoardException("Unable to get nofification channels from" + " Grafana") + + +def define_notification(channel_name): + """ + Add a given "seed" notification channel to Grafana using http post + :param channel_name: (str) channel name + :return: (int) http response code from post operation + (dict) response json object + """ + + seed_channel = json.dumps({"name": channel_name, + "type": "email", + "isDefault": False + }) + + resp = post('http://{}:{}/api/' + 'alert-notifications'.format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=seed_channel) + + return resp.status_code, resp.json() + + +def main(): + + rc = 0 + + if port_open(config.grafana_port, config.grafana_host): + logger.debug("Connection to Grafana is ok") + else: + logger.error("Unable to contact Grafana - does the config file " + "specify a valid host/ip address for Grafana?") + return 16 + + if config.dashboards: + vars_to_update = {k: config.yaml[k] for k in config.yaml + if not k.startswith('_')} + if 'domain' not in vars_to_update: + vars_to_update['domain'] = config.domain + + else: + logger.error("Config file doesn't contain dashboards! Unable " + "to continue") + return 16 + + dashboards_updated = 0 + logger.debug("Templates to update: {}".format(vars_to_update)) + + for dashname in config.dashboards: + logger.info("\nProcessing dashboard {}".format(dashname)) + + http_rc, dashjson = get_dashboard(dashname) + if (dashname == config.alert_dashboard and http_rc == 200 and not + opts.update_alerts): + logger.info("- existing alert dashboard found, update bypassed") + continue + + if opts.mode == 'update': + + if http_rc == 200: + # the dashboard is already loaded, so we'll use the existing + # definition + logger.debug("- existing dashboard will be updated") + else: + # get of dashboard failed, so just load it + dashjson = load_dashboard(opts.dashboard_dir, dashname) + + if dashjson: + logger.info("- dashboard loaded from sample") + else: + logger.warning("- sample not available, skipping") + rc = max(rc, 4) + continue + + logger.info("- dashboard retrieved") + + elif opts.mode == 'refresh': + + dashjson = load_dashboard(opts.dashboard_dir, dashname) + + if not dashjson: + logger.warning("- sample not available, skipping") + rc = max(rc, 4) + continue + + if dashname == config.alert_dashboard: + # if processing is here, this is 1st run so the alert_dashboard + # is new to grafana + channel_id = get_notification_id("cephmetrics") + if channel_id: + logger.info("- notification channel already in place") + else: + http_rc, resp_json = define_notification("cephmetrics") + if http_rc == 200: + channel_id = resp_json['id'] + logger.info("- notification channel added :" + "{}".format(channel_id)) + else: + raise DashBoardException("Problem adding notification " + "channel ({})".format(http_rc)) + + dash_str = json.dumps(dashjson) + dash_str = dash_str.replace('"notifications": []', + '"notifications": [{{ "id":' + ' {0} }}]'.format(channel_id)) + if config.domain: + logger.debug("- queries updated, replacing $domain with " + "'{}'".format(config.domain)) + dash_str = dash_str.replace('.$domain', + ".{}".format(config.domain)) + else: + logger.debug("- queries updated, replacing $domain with NULL") + dash_str = dash_str.replace('.$domain', + '') + + dashjson = json.loads(dash_str) + + else: + # Normal dashboard processing + templating = dashjson['dashboard'].get('templating') + if templating: + dashjson = update_dashboard(dashjson, vars_to_update) + else: + logger.info('- templating not defined in {}, ' + 'skipping'.format(dashname)) + rc = max(rc, 4) + + http_rc = put_dashboard(dashjson) + + if http_rc == 200: + logger.info("- dashboard update successful") + dashboards_updated += 1 + + if dashname == config.home_dashboard: + # ensure the home dashboard is defined + http_rc = set_home_dashboard(dashname) + + if http_rc != 200: + logger.warning("- Unable to set the home dashboard") + rc = max(rc, 12) + + else: + logger.error("- dashboard {} update failed ({})".format(dashname, + http_rc)) + rc = max(rc, 8) + + return rc + + +if __name__ == '__main__': + + opts = get_options() + + config = get_config(opts.config_file) + + if config: + + logger = setup_logging() + + rc = main() + + sys.exit(rc) + + else: + + print("Invalid config file detected, unable to start") + sys.exit(16) diff --git a/dashboard.yml b/dashboard.yml new file mode 100644 index 0000000..055f935 --- /dev/null +++ b/dashboard.yml @@ -0,0 +1,47 @@ +--- +########################################################## +# Change these settings to reflect your ceph environment # +########################################################## +osd_servers: + - obj-osd-1 + - obj-osd-2 + - obj-osd-3 + +rgw_servers: + - obj-rgw-1 + +#iscsi_gateways: +# - rh7-gw1 +# - rh7-gw2 + +domain: storage.lab + +########################################################################### +# This section defines the internal variables (denoted by the '_' prefix) # +# that govern how dashUpdater.py runs. Normally you'd leave these alone # +########################################################################### +_dashboards: + - alert-status + - ceph-at-a-glance + - ceph-backend-storage + - ceph-cluster + - ceph-health + - ceph-osd-information + - ceph-pools + - ceph-rgw-workload + - disk-busy-by-server + - iops-by-server + - iscsi-overview + - latency-by-server + - network-usage-by-node + - osd-node-detail + +_home_dashboard: ceph-at-a-glance +_alert_dashboard: alert-status + +_credentials: + user: admin@localhost + password: admin + +_grafana_port: 3000 + diff --git a/dashboards/archive/Ceph_dashboard-2017-05-19.json b/dashboards/archive/Ceph_dashboard-2017-05-19.json new file mode 100644 index 0000000..f8099f0 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-19.json @@ -0,0 +1,788 @@ +{ + "__inputs": [ + { + "name": "DS_INFLUX", + "label": "influx", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 157, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_mon", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "reads", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "writes", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client IOPS for all Pools", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Reads", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Writes", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Throughput - All Pools", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard", + "version": 2 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-24.json b/dashboards/archive/Ceph_dashboard-2017-05-24.json new file mode 100644 index 0000000..7ea21a7 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-24.json @@ -0,0 +1,2413 @@ +{ + "__inputs": [ + { + "name": "DS_INFLUX", + "label": "influx", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "vonage-status-panel", + "name": "Status Panel", + "version": "1.0.4" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 226, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_mon", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "clusterName": "OSDs", + "displayName": "OSDs", + "flipCard": false, + "flipTime": 5, + "id": 20, + "isGrayOnNoData": true, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "Total", + "displayType": "Annotation", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Up", + "display": true, + "displayType": "Regular", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "In", + "displayType": "Regular", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + } + ], + "title": "", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "decimals": null, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Client Bandwidth", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 16, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [ + "1", + "50" + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "PG's peering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Object Degraded", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Objects Unfound", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Objects Recovering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_objects_per_sec", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "title": "Health Indicators", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSDs", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "OSD's Active (in)", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_in", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.write_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.write_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 223, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.put_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_INFLUX}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Degraded", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Misplaced", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_misplaced", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "UnFound", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_INFLUX}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Active", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_active", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Active/Clean", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_active_clean", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Peering", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_INFLUX}", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "show series ", + "refresh": 1, + "regex": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools\\.(\\w+)/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_INFLUX}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name", + "options": [], + "query": "show series", + "refresh": 1, + "regex": "/collectd\\.(.*)\\.cephmetrics.*\\.rgw/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard", + "version": 57 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-25.json b/dashboards/archive/Ceph_dashboard-2017-05-25.json new file mode 100644 index 0000000..915b0e1 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-25.json @@ -0,0 +1,2689 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_RGW_NAME", + "type": "constant", + "label": "RGW Host", + "value": "obj-rgw-1", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Bandwidth", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used" + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload by Pool", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "height": "245px", + "id": 16, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 12, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [ + "1", + "50" + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "PG's peering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')", + "textEditor": false + }, + { + "alias": "Object Degraded", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_degraded", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_degraded\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + }, + { + "alias": "Objects Unfound", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_unfound", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_unfound\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + }, + { + "alias": "Objects Recovering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_objects_per_sec", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_objects_per_sec, 'Objects recovering')" + } + ], + "title": "Health Indicators", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 2, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name_old", + "options": [], + "query": "show series ", + "refresh": 1, + "regex": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools\\.(\\w+)/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 2, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name_old", + "options": [], + "query": "show series", + "refresh": 1, + "regex": "/collectd\\.(.*)\\.cephmetrics.*\\.rgw/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_RGW_NAME}", + "text": "${VAR_RGW_NAME}" + }, + "hide": 0, + "label": "RGW Host", + "name": "rgw_name", + "options": [ + { + "value": "${VAR_RGW_NAME}", + "text": "${VAR_RGW_NAME}" + } + ], + "query": "${VAR_RGW_NAME}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 1 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-26.json b/dashboards/archive/Ceph_dashboard-2017-05-26.json new file mode 100644 index 0000000..e6cb476 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-26.json @@ -0,0 +1,2792 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used" + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.*.cpu.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": true, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 17 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-29.json b/dashboards/archive/Ceph_dashboard-2017-05-29.json new file mode 100644 index 0000000..9765ed9 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-29.json @@ -0,0 +1,3669 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.vdb.osd_id,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "Disk Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)", + "textEditor": true + } + ], + "thresholds": "20,60", + "title": "Latency(ms)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail" + } + ], + "thresholds": "", + "title": "Free Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.percent.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "300", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_mbps), \"Read Throughput\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_mbps), \"Write Throughput\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Load (MB/s) - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Latency (ms) - all OSD's - at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Overall Disk Busy at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - all OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend IOPS - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(collectd.*.$domain.cpu.percent.idle,0.01),1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend OSD Load Summary", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 39, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_id,1,-2)", + "textEditor": true + } + ], + "title": "Host/Disk to OSD ID Mapping", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 41, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,-3)", + "textEditor": true + } + ], + "title": "Disk Latency Breakdown (ms)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 42, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.mbps,1,-3)", + "textEditor": true + } + ], + "title": "Disk Bandwidth (MB/s)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 45, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util,1,-3)", + "textEditor": true + } + ], + "title": "Disk %Util", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 48, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,-3)", + "textEditor": true + } + ], + "title": "IOPS/Disk", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Detail", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": true, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 37 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-31.json b/dashboards/archive/Ceph_dashboard-2017-05-31.json new file mode 100644 index 0000000..4fbc5e6 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-31.json @@ -0,0 +1,3757 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.vdb.osd_id,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "Disk Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)", + "textEditor": true + } + ], + "thresholds": "20,60", + "title": "Latency(ms)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail" + } + ], + "thresholds": "", + "title": "Free Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.percent.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "300", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_mbps), \"Read Throughput\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_mbps), \"Write Throughput\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Load (MB/s) - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Latency (ms) - all OSD's - at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Overall Disk Busy at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - all OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/iops-by-server", + "dashboard": "IOPS by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "IOPS by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend IOPS - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(collectd.*.$domain.cpu.percent.idle,0.01),1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend OSD Load Summary", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 39, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_id,1,-2)", + "textEditor": true + } + ], + "title": "Host/Disk to OSD ID Mapping", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 41, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,-3)", + "textEditor": true + } + ], + "title": "Disk Latency Breakdown (ms)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 42, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.mbps,1,-3)", + "textEditor": true + } + ], + "title": "Disk Bandwidth (MB/s)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 45, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util,1,-3)", + "textEditor": true + } + ], + "title": "Disk %Util", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 48, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,-3)", + "textEditor": true + } + ], + "title": "IOPS/Disk", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Detail", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 43 +} \ No newline at end of file diff --git a/dashboards/archive/Disk Busy by Server-2017-05-31.json b/dashboards/archive/Disk Busy by Server-2017-05-31.json new file mode 100644 index 0000000..fdc6468 --- /dev/null +++ b/dashboards/archive/Disk Busy by Server-2017-05-31.json @@ -0,0 +1,404 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Servers Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by Highest Disk %Util", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's $percentile%ile Utilisation", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Disk Busy by Server", + "version": 25 +} \ No newline at end of file diff --git a/dashboards/archive/IOPS by Server-2017-05-31.json b/dashboards/archive/IOPS by Server-2017-05-31.json new file mode 100644 index 0000000..02de4d6 --- /dev/null +++ b/dashboards/archive/IOPS by Server-2017-05-31.json @@ -0,0 +1,363 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.iops,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Server IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.iops)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "IOPS by Server", + "version": 2 +} \ No newline at end of file diff --git a/dashboards/archive/Latency by Server-2017-05-31.json b/dashboards/archive/Latency by Server-2017-05-31.json new file mode 100644 index 0000000..b5465dd --- /dev/null +++ b/dashboards/archive/Latency by Server-2017-05-31.json @@ -0,0 +1,384 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.await,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Servers - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.await)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + } + ], + "query": "95", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Latency by Server", + "version": 1 +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/Dashboard Relationships.png b/dashboards/cephmetrics-graphite/Dashboard Relationships.png new file mode 100644 index 0000000..1896088 Binary files /dev/null and b/dashboards/cephmetrics-graphite/Dashboard Relationships.png differ diff --git a/dashboards/cephmetrics-graphite/alert-status.json b/dashboards/cephmetrics-graphite/alert-status.json new file mode 100644 index 0000000..21febc7 --- /dev/null +++ b/dashboards/cephmetrics-graphite/alert-status.json @@ -0,0 +1,1255 @@ +{ + "meta": { + "canSave": true, + "created": "2017-08-03T21:42:28Z", + "canStar": true, + "expires": "0001-01-01T00:00:00Z", + "updated": "2017-08-18T05:26:10Z", + "slug": "alert-status", + "version": 15, + "createdBy": "admin", + "updatedBy": "admin", + "type": "db", + "canEdit": true + }, + "dashboard": { + "style": "dark", + "rows": [ + { + "repeat": null, + "titleSize": "h6", + "collapse": false, + "title": "Dashboard Row", + "height": "250px", + "repeatRowId": null, + "panels": [ + { + "span": 12, + "stateFilter": [ + "alerting" + ], + "links": [], + "show": "current", + "title": "Active Ceph Alert List", + "onlyAlertsOnDashboard": true, + "limit": "20", + "sortOrder": 3, + "type": "alertlist", + "id": 1 + } + ], + "showTitle": false, + "repeatIteration": null + }, + { + "repeat": null, + "titleSize": "h5", + "collapse": false, + "title": "Health Checks", + "height": 250, + "repeatRowId": null, + "panels": [ + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "fill": true, + "value": 0, + "op": "gt" + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 2, + "steppedLine": true, + "id": 2, + "maxDataPoints": "360", + "fill": 1, + "span": 2, + "title": "Overall Ceph Health", + "tooltip": { + "sort": 1, + "shared": false, + "value_type": "individual" + }, + "targets": [ + { + "textEditor": true, + "target": "alias(maxSeries(consolidateBy(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.mon.health,0)),\"max\")),\"Ceph Health\")", + "refId": "A" + } + ], + "yaxes": [ + { + "logBase": 1, + "format": "short", + "max": "10", + "min": "0", + "label": "", + "show": true + }, + { + "logBase": 1, + "show": false, + "max": null, + "format": "short", + "label": null, + "min": null + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 4 or 8 where 0 is OK, 4 is WARN and 8 represents an ERROR state.", + "alert": { + "noDataState": "no_data", + "name": "Overall Ceph Health", + "frequency": "10s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "keep_state", + "message": "Cluster Health is not OK", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "20s", + "now" + ] + }, + "evaluator": { + "type": "gt", + "params": [ + 0 + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ] + }, + "hideTimeOverride": false, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": { + "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7", + "Ceph Health": "#890F02", + "ceph health": "#890F02" + }, + "lines": true, + "legend": { + "total": false, + "min": false, + "max": false, + "show": true, + "current": false, + "values": false, + "avg": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "fill": true, + "value": 0, + "op": "gt" + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "textEditor": true, + "hide": true, + "target": "currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)", + "refId": "A" + }, + { + "targetFull": "alias(countSeries(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)),\"OSDs Near Full\")", + "textEditor": true, + "target": "alias(countSeries(#A),\"OSDs Near Full\")", + "refId": "B" + } + ], + "fill": 1, + "span": 2, + "title": "Disks Near Full", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 3, + "yaxes": [ + { + "logBase": 1, + "min": "0", + "max": null, + "format": "short", + "label": null, + "show": true + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "show": false, + "label": null + } + ], + "xaxis": { + "buckets": null, + "values": [], + "mode": "time", + "name": null, + "show": true + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.", + "alert": { + "noDataState": "ok", + "name": "Disks Near Full", + "frequency": "60s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "keep_state", + "message": "DIsks Near full detected within the cluster. Warning threshold is 80% full.", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "reducer": { + "type": "max", + "params": [] + }, + "type": "query" + } + ] + }, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": {}, + "lines": true, + "legend": { + "total": false, + "show": false, + "max": false, + "min": false, + "current": false, + "values": false, + "avg": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": true, + "timeFrom": "5m", + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "op": "gt", + "value": 0, + "fill": true + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 2, + "steppedLine": true, + "targets": [ + { + "textEditor": true, + "hide": true, + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\")", + "refId": "A" + }, + { + "hide": true, + "textEditor": true, + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")" + }, + { + "hide": false, + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")), \"down\")", + "textEditor": true, + "refId": "C", + "target": "alias(diffSeries(#A,#B), \"down\")" + } + ], + "fill": 2, + "span": 2, + "title": "OSDs Down", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 4, + "yaxes": [ + { + "logBase": 1, + "format": "short", + "max": null, + "min": "0", + "label": null, + "show": true + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "show": false, + "label": null + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Count of OSDs currently in a DOWN state", + "alert": { + "noDataState": "ok", + "name": "OSDs Down", + "frequency": "10s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "alerting", + "message": "OSD Down event", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "30s", + "now" + ] + }, + "evaluator": { + "type": "gt", + "params": [ + 0 + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ] + }, + "hideTimeOverride": true, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": {}, + "lines": false, + "legend": { + "rightSide": false, + "total": false, + "min": false, + "max": false, + "show": false, + "current": false, + "values": false, + "alignAsTable": false, + "avg": false, + "hideZero": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "op": "gt", + "value": 85, + "fill": true + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "stack": false, + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "hide": true, + "textEditor": true, + "refId": "A", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")" + }, + { + "hide": true, + "textEditor": true, + "refId": "B", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\")" + }, + { + "targetFull": "alias(asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\"), alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")), \"Raw Capacity Used %\")", + "textEditor": true, + "target": "alias(asPercent(#B, #A), \"Raw Capacity Used %\")", + "refId": "C" + } + ], + "fill": 1, + "span": 2, + "title": "Cluster Capacity", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 5, + "points": false, + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster", + "alert": { + "noDataState": "keep_state", + "name": "Cluster Capacity", + "frequency": "60s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "alerting", + "message": "Cluster Capacity Limit Warning", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "1h", + "now" + ] + }, + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ] + }, + "dashLength": 10, + "legend": { + "total": false, + "min": false, + "max": false, + "show": true, + "current": false, + "values": false, + "avg": false + }, + "timeShift": null, + "aliasColors": {}, + "lines": true, + "yaxes": [ + { + "logBase": 1, + "min": "0", + "max": "100", + "format": "percent", + "show": true, + "label": "" + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "show": false, + "label": null + } + ], + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": "6h", + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "fill": true, + "value": 0, + "op": "gt" + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 2, + "steppedLine": false, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "alias(maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.*.mon.num_pgs_stuck, \"maxSeries\")), \"# pg's stuck inactive\")" + } + ], + "fill": 2, + "span": 2, + "title": "PG's Stuck", + "tooltip": { + "sort": 0, + "shared": false, + "value_type": "individual" + }, + "id": 8, + "yaxes": [ + { + "logBase": 1, + "min": "0", + "max": null, + "format": "short", + "show": true, + "label": null + }, + { + "logBase": 1, + "show": false, + "max": null, + "format": "short", + "min": null, + "label": null + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [ + "total" + ], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.", + "alert": { + "noDataState": "no_data", + "name": "PG's Stuck", + "frequency": "60s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "alerting", + "message": "PG's stuck inactive", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "reducer": { + "type": "last", + "params": [] + }, + "type": "query" + } + ] + }, + "hideTimeOverride": true, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": {}, + "lines": true, + "legend": { + "total": false, + "min": false, + "max": false, + "show": true, + "current": false, + "values": false, + "avg": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "op": "lt", + "value": 0, + "fill": true + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "hide": true, + "textEditor": true, + "refId": "A", + "target": "alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\")" + }, + { + "textEditor": true, + "hide": true, + "target": "alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")", + "refId": "B" + }, + { + "targetFull": "alias(diffSeries(alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\"),alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")),\"freespace after Node loss\")", + "textEditor": true, + "refId": "C", + "target": "alias(diffSeries(#A,#B),\"freespace after Node loss\")" + } + ], + "fill": 1, + "span": 2, + "title": "OSD Host Loss Check", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 9, + "yaxes": [ + { + "logBase": 1, + "show": true, + "max": null, + "format": "decbytes", + "min": "0", + "label": null + }, + { + "logBase": 1, + "show": true, + "max": null, + "format": "short", + "min": null, + "label": null + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host", + "alert": { + "noDataState": "ok", + "name": "OSD Host Loss Check", + "frequency": "60s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "alerting", + "message": "OSD Host Loss Free Space Check Failed", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "evaluator": { + "type": "lt", + "params": [ + 0 + ] + }, + "reducer": { + "type": "min", + "params": [] + }, + "type": "query" + } + ] + }, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "lines": true, + "legend": { + "total": false, + "show": false, + "max": false, + "min": false, + "current": false, + "values": false, + "avg": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": "1h", + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "fill": true, + "value": 1000, + "op": "gt" + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "stack": false, + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "aliasByNode(currentAbove(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.osd.*.perf.await,-1)),1000),1,-3)" + } + ], + "maxDataPoints": "", + "fill": 1, + "span": 2, + "title": "Slow OSD responses", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 10, + "yaxes": [ + { + "logBase": 1, + "min": "0", + "max": null, + "format": "none", + "label": "ms", + "show": true + }, + { + "logBase": 1, + "format": "short", + "max": null, + "min": null, + "label": null, + "show": false + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Graph checking for OSD Latencies that are above 1s.", + "alert": { + "noDataState": "ok", + "name": "Slow OSD responses alert", + "frequency": "30s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "alerting", + "message": "OSD Response time is > 1s", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "evaluator": { + "type": "gt", + "params": [ + 1000 + ] + }, + "reducer": { + "type": "max", + "params": [] + }, + "type": "query" + } + ] + }, + "hideTimeOverride": true, + "dashLength": 10, + "legend": { + "total": false, + "show": true, + "max": false, + "min": false, + "current": false, + "values": false, + "avg": false + }, + "timeShift": null, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "lines": true, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "op": "gt", + "value": 10, + "fill": true + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "id": 11, + "fill": 1, + "span": 2, + "title": "Network Errors", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "targets": [ + { + "textEditor": true, + "target": "groupByNode(collectd.*.$domain.interface.*.if_{dropped,errors}.*,1,\"sumSeries\")", + "refId": "A" + } + ], + "points": false, + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval", + "alert": { + "noDataState": "no_data", + "name": "Network Errors alert", + "frequency": "30s", + "notifications": [ + { + "id": 1 + } + ], + "handler": 1, + "executionErrorState": "keep_state", + "message": "Network rx/tx issues detected", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "evaluator": { + "type": "gt", + "params": [ + 10 + ] + }, + "reducer": { + "type": "max", + "params": [] + }, + "type": "query" + } + ] + }, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": {}, + "lines": true, + "legend": { + "total": false, + "min": false, + "max": false, + "show": false, + "current": false, + "values": false, + "avg": false + }, + "yaxes": [ + { + "logBase": 1, + "format": "none", + "max": null, + "min": "0", + "label": null, + "show": true + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "show": false, + "label": null + } + ], + "datasource": null, + "pointradius": 5, + "minSpan": 2 + }, + { + "bars": false, + "timeFrom": null, + "links": [], + "thresholds": [ + { + "colorMode": "critical", + "line": true, + "fill": true, + "value": 85, + "op": "gt" + } + ], + "spaceLength": 10, + "nullPointMode": "null", + "renderer": "flot", + "linewidth": 2, + "steppedLine": false, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.pools.*.percent_used,-2,'maxSeries')" + } + ], + "fill": 5, + "span": 2, + "title": "Pool Capacity", + "tooltip": { + "sort": 0, + "shared": true, + "value_type": "individual" + }, + "id": 12, + "points": false, + "xaxis": { + "buckets": null, + "values": [], + "mode": "time", + "name": null, + "show": true + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "repeat": null, + "alert": { + "noDataState": "keep_state", + "name": "Pool Capacity", + "frequency": "60s", + "notifications": [], + "handler": 1, + "executionErrorState": "alerting", + "conditions": [ + { + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ] + }, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": {}, + "lines": true, + "legend": { + "avg": false, + "min": false, + "max": false, + "show": true, + "current": false, + "values": false, + "total": false + }, + "yaxes": [ + { + "logBase": 1, + "format": "percent", + "max": null, + "min": null, + "label": null, + "show": true + }, + { + "logBase": 1, + "show": true, + "max": null, + "format": "short", + "label": null, + "min": null + } + ], + "datasource": "Local", + "pointradius": 5, + "minSpan": 2 + } + ], + "showTitle": true, + "repeatIteration": null + } + ], + "templating": { + "list": [] + }, + "links": [], + "tags": [], + "graphTooltip": 0, + "hideControls": true, + "title": "Alert Status", + "editable": false, + "refresh": "10s", + "annotations": { + "list": [] + }, + "gnetId": null, + "version": 15, + "time": { + "to": "now", + "from": "now-1h" + }, + "timezone": "browser", + "schemaVersion": 14, + "timepicker": { + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "id": 24 + } +} diff --git a/dashboards/cephmetrics-graphite/ceph-at-a-glance.json b/dashboards/cephmetrics-graphite/ceph-at-a-glance.json new file mode 100644 index 0000000..ac2bc3b --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-at-a-glance.json @@ -0,0 +1,3062 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 64, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "145", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 86, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Cluster", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 82, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Pools", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 91, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSDs", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 84, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-rgw-workload", + "dashboard": "Ceph - RGW Workload", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph RADOSGW Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "S3/Swift", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 83, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSD Hosts", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 85, + "interval": null, + "links": [ + { + "dashUri": "db/network-usage-by-node", + "dashboard": "Network Usage by Node", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Usage by Host", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Network", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "id": 95, + "links": [], + "minSpan": 4, + "mode": "markdown", + "span": 4, + "title": "", + "transparent": true, + "type": "text" + }, + { + "id": 94, + "limit": 10, + "links": [ + { + "dashUri": "db/alert-status", + "dashboard": "Alert Status", + "targetBlank": true, + "title": "Alert Status", + "type": "dashboard" + } + ], + "minSpan": 2, + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 3, + "span": 2, + "stateFilter": [ + "alerting" + ], + "title": "Active Alerts", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "225", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": "Local", + "description": "Shows the overall health of the ceph cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 28, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-health", + "dashboard": "Ceph Health", + "includeVars": true, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Health", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "OK", + "to": "1" + }, + { + "from": "1", + "text": "WARN", + "to": "4" + }, + { + "from": "5", + "text": "ERROR", + "to": "99" + }, + { + "from": "-10", + "text": "NODATA", + "to": "0" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.health),\"max\")", + "textEditor": true + } + ], + "thresholds": "1,5", + "timeFrom": "1m", + "timeShift": null, + "title": "Health", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "4" + }, + { + "op": "=", + "text": "ERROR", + "value": "8" + } + ], + "valueName": "current" + }, + { + "clusterName": "MONs", + "displayName": "MONs", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 75, + "isGrayOnNoData": true, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "refId": "D", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "quorum", + "display": true, + "displayType": "Regular", + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + }, + { + "aggregation": "Last", + "alias": "down", + "display": true, + "displayType": "Regular", + "refId": "A", + "target": "alias(diffSeries(#D,#E), \"down\")", + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")), \"down\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSDs", + "displayName": "OSDs", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 65, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "hide": false, + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "in", + "displayType": "Regular", + "hide": false, + "refId": "F", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up), \"max\")),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "hide": false, + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_in), \"max\")),\"in\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "out", + "displayType": "Regular", + "refId": "C", + "target": "alias(countSeries(currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.in,99),-2,\"maxSeries\"),0.5)),\"out\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": "5", + "display": true, + "displayType": "Regular", + "refId": "B", + "target": "alias(countSeries(currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.up,99),-2,\"maxSeries\"),0.5)),\"down\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSD Hosts", + "displayName": "OSD Hosts", + "flipTime": 5, + "hideTimeOverride": true, + "id": 100, + "links": [], + "minSpan": 1, + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "refId": "A", + "target": "alias(countSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "refId": "B", + "target": "alias(countSeries(currentAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,0)),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": "2", + "display": true, + "displayType": "Regular", + "refId": "C", + "target": "alias(diffSeries(#A, #B), \"down\")", + "targetFull": "alias(diffSeries(alias(countSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds),\"total\"), alias(countSeries(currentAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,0)),\"up\")), \"down\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + } + ], + "timeFrom": "30s", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "RGWs", + "displayName": "RGWs", + "flipTime": 5, + "hideTimeOverride": true, + "id": 101, + "isGrayOnNoData": false, + "links": [], + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "hide": true, + "refId": "A", + "target": "constantLine(0)", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "displayType": "Regular", + "hide": true, + "refId": "B", + "target": "countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put)", + "textEditor": true, + "valueHandler": "Regular" + }, + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "hide": false, + "refId": "C", + "target": "alias(limit(sortByMaxima(group(#A,#B)),1),\"total\")", + "targetFull": "alias(limit(sortByMaxima(group(constantLine(0),countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put))),1),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "displayType": "Regular", + "hide": true, + "refId": "D", + "target": "countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0))", + "textEditor": true, + "valueHandler": "Regular" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "hide": false, + "refId": "E", + "target": "alias(limit(sortByMaxima(group(#A,#D)),1), \"up\")", + "targetFull": "alias(limit(sortByMaxima(group(constantLine(0),countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0)))),1), \"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": "6", + "display": true, + "displayType": "Regular", + "hide": false, + "refId": "F", + "target": "alias(diffSeries(#B, #D),\"down\")", + "targetFull": "alias(diffSeries(countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put), countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0))),\"down\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + } + ], + "timeFrom": "30s", + "timeShift": null, + "title": "", + "transparent": false, + "type": "vonage-status-panel" + }, + { + "clusterName": "MDS", + "displayName": "MDS", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 99, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "hide": false, + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_up), \"max\")),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "in", + "displayType": "Regular", + "hide": false, + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_in), \"max\")),\"in\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": "2", + "display": true, + "displayType": "Regular", + "hide": false, + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_failed), \"max\")),\"down\")", + "textEditor": true, + "valueHandler": "Threshold", + "warn": "1" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Local", + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 38, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "params": "panelId=3&fullscreen&orgId=1", + "targetBlank": true, + "title": "Cluster Capacity Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "%", + "postfixFontSize": "40%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 1,\"maxSeries\")), \"raw capacity\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 1,\"maxSeries\")), \"used raw\")", + "textEditor": true + }, + { + "refId": "C", + "target": "asPercent(#B,#A)", + "targetFull": "asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 1,\"maxSeries\")), \"used raw\"),alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 1,\"maxSeries\")), \"raw capacity\"))", + "textEditor": true + } + ], + "thresholds": "70,90", + "timeFrom": "1m", + "timeShift": null, + "title": "Capacity Utilization", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 1, + "description": "Shows the growth rate based on osd usage over the past $growth_window.", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 80, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "D", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")", + "textEditor": true + }, + { + "hide": true, + "refId": "E", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(diffSeries(#E, #D), 'Growth over last $growth_window')", + "targetFull": "alias(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")), 'Growth over last $growth_window')", + "textEditor": true + } + ], + "thresholds": "", + "title": "$growth_window Growth Rate", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "description": "Shows the estimated number of weeks left, based on consumption over the past $growth_window.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 81, + "interval": null, + "links": [], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + }, + { + "from": "-99999", + "text": "N/A", + "to": "0" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\")", + "textEditor": true + }, + { + "hide": true, + "refId": "C", + "target": "keepLastValue(diffSeries(#B, #A))", + "targetFull": "keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))", + "textEditor": true + }, + { + "hide": true, + "refId": "D", + "target": "consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\")", + "textEditor": true + }, + { + "hide": false, + "refId": "E", + "target": "alias(divideSeries(#D, #C), 'Weeks Left till full')", + "targetFull": "alias(divideSeries(consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\"), keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))), 'Weeks Left till full')", + "textEditor": true + } + ], + "thresholds": "", + "title": "Weeks Remaining", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "active + clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_active_clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering": "#ffa500", + "peering": "#0A50A1" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "" + }, + "datasource": "Local", + "fontSize": "100%", + "format": "none", + "height": "210", + "hideTimeOverride": true, + "id": 74, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": false + }, + "legendType": "Under graph", + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster Information", + "type": "dashboard" + } + ], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": "0", + "targets": [ + { + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\")),\"active + clean\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\")),\"active\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(diffSeries(#B,#A),\"active + degraded\")", + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\")),\"active\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\")),\"active + clean\")),\"active + degraded\")", + "textEditor": true + }, + { + "hide": false, + "refId": "D", + "target": "alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_peering), \"max\"),\"peering\")", + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Placement Group Status", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "At a Glance", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "230", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,0,0,1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": "Local", + "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 97, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"max\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.scrub),\"max\")", + "textEditor": true + }, + { + "refId": "C", + "target": "maxSeries(group(#A, #B))", + "targetFull": "maxSeries(group(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"max\"), consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.scrub),\"max\")))", + "textEditor": true + } + ], + "thresholds": "2", + "title": "Scrub", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "INACTIVE", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,165,0, 0.89)", + "rgba(255, 0, 0, 1)" + ], + "datasource": "Local", + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 96, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.recovering_bytes_per_sec,-2,\"avg\"))", + "textEditor": true + } + ], + "thresholds": "1,2", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.op_per_sec,-2,\"maxSeries\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.bytes_sec,-2,\"maxSeries\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Client Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 76, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pool)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 77, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "B", + "target": "alias(sumSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_rbds,6),\"max\")),\"# rbds\")", + "textEditor": true + } + ], + "thresholds": "", + "title": "RBDs", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current" + }, + { + "aliasColors": { + "Reads": "#01a701", + "Writes": "#82B5D8" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Local", + "description": "Shows the read/write threshold of client IOPS serviced by the ceph cluster", + "fontSize": "80%", + "format": "none", + "height": "230", + "id": 93, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": false + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.read_op_per_sec,-2,\"maxSeries\")), \"Reads\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.write_op_per_sec,-2,\"maxSeries\")), \"Writes\")", + "textEditor": true + } + ], + "title": "Client Read/Write Ratio", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02", + "Commit Latency": "#447EBC" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the OSD apply and commit latency at the $percentile%ile across the cluster over the past 15 minutes", + "fill": 0, + "hideTimeOverride": true, + "id": 92, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 2, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$percentile), \"Apply Latency\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency, $percentile), \"Commit Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "OSD Apply vs Commit Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Performance", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "230px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Local", + "decimals": 0, + "description": "CPU usage is presented based on the $percentile%ile across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 59, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{system,user,wait},1,\"sumSeries\"),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "OSD Hosts CPU Busy", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "description": "Total IOPS from all OSDs in the cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 72, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 73, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.bytes_per_sec)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "description": "The count of the number of disks in the cluster that are over $disk_full_threshold% full.", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 79, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "countSeries(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_percent_used),0),$disk_full_threshold))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Nearly Full Disks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "description": "RAM Usage shows the $percentile%ile of RAM used across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 78, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(46, 161, 15, 0)", + "full": false, + "lineColor": "rgb(164, 139, 4)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "percentileOfSeries(collectd.$osd_servers.$domain.memory.percent.used,$percentile)", + "textEditor": true + } + ], + "thresholds": "", + "title": "RAM Util.", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1,1)", + "rgba(255,165,0,1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Local", + "decimals": 0, + "description": "This panel shows the $percentile%ile disk await time across all OSDs in the cliuster", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " ms", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)", + "textEditor": true + } + ], + "thresholds": "20,60", + "title": "Disk Latency", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "average %util": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Chart shows the disk utilization over the past 15 mins expressed as an average across all OSDs, and at the $percentile%ile.", + "fill": 1, + "hideTimeOverride": true, + "id": 98, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "%util @ 95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util), 'average %util')", + "textEditor": false + }, + { + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util, $percentile, 'false'), '%util @ $percentile%ile')" + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 0, + "includeAll": false, + "label": "Cluster Name", + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "obj-rgw-1", + "value": "obj-rgw-1" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": true, + "name": "rgw_servers", + "options": [ + { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + ], + "query": "obj-rgw-1", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "mon_servers", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.mon_status.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "7d", + "value": "7d" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "growth_window", + "options": [ + { + "selected": true, + "text": "7d", + "value": "7d" + } + ], + "query": "7d", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph - At A Glance", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-10-11T02:45:33Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "isStarred": true, + "slug": "ceph-at-a-glance", + "type": "db", + "updated": "2017-10-11T02:51:00Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/ceph-backend-storage.json b/dashboards/cephmetrics-graphite/ceph-backend-storage.json new file mode 100644 index 0000000..f0e7a81 --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-backend-storage.json @@ -0,0 +1,1276 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 13, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "hideTimeOverride": true, + "id": 56, + "links": [], + "minSpan": 1, + "pageSize": 100, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 1, + "styles": [ + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "sanitize": false, + "thresholds": [ + "" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Status", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [ + "0" + ], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "hide": false, + "refId": "B", + "target": "currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.up,99),-2,\"maxSeries\"),0.5)", + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD's Down", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 57, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Host and Disk", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Host.OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_percent_used),0),$disk_full_threshold),1,-2)", + "textEditor": true + } + ], + "title": "Disks Near Full", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 58, + "links": [], + "maxDataPoints": "1", + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Total OSD Capacity", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "alias": "Host Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "groupByNode(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),1,\"sumSeries\")", + "textEditor": true + } + ], + "title": "Host OSD Raw Capacity", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 59, + "links": [], + "maxDataPoints": "10", + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "# OSDs", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Host Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,6),1)", + "textEditor": true + } + ], + "title": "Host OSD Capacity Breakdown", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk/OSD Host Summary", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "300", + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateRdYlGn", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the count of disk utilization observations at that point.", + "heatmap": {}, + "highlightCards": true, + "id": 53, + "links": [], + "minSpan": 6, + "span": 6, + "targets": [ + { + "refId": "A", + "target": "collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util", + "textEditor": true + } + ], + "title": "Disk Drive Utilization Heatmap - $osd_servers", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": 180, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "short", + "logBase": 1, + "max": "100", + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": 5 + }, + { + "aliasColors": { + "Read Throughput": "#629E51", + "Write Throughput": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_bytes_per_sec), \"Read Throughput\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_bytes_per_sec), \"Write Throughput\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Read Latency": "#629E51", + "Write Latency": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "line": true, + "lineColor": "rgba(178, 0, 0, 0.29)", + "op": "gt", + "value": 50 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Latency - $osd_servers OSDs @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "disk busy %": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "line": true, + "lineColor": "rgba(178, 0, 0, 0.29)", + "op": "gt", + "value": 80 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "IOPS/spindle": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "IOPS": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/iops-by-server", + "dashboard": "IOPS by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "IOPS by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Disk IOPS - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk/OSD Load Summary", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": { + "CPU Busy": "#447EBC", + "CPU Busy @ 95%ile": "#890F02", + "Cluster-wide CPU Busy @ 95%ile": "#890F02", + "Max CPU Busy": "#BF1B00", + "Max CPU Busy - all OSD Hosts": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 3, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Cluster-wide CPU Busy @ 95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(groupByNode(collectd.*.$domain.cpu.percent.{wait,interrupt,nice,user,system}, 1, \"sumSeries\"),$percentile),\"Cluster-wide CPU Busy @ $percentile%ile\")", + "textEditor": true + }, + { + "hide": false, + "refId": "B", + "target": "alias(averageSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{wait,interrupt,nice,user,system}, 1, \"sumSeries\")),\"Average OSD Host(s) CPU Busy\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Network load (rx+tx)": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 49, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/network-usage-by-node", + "dashboard": "Network Usage by Node", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Load by Node", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "hide": true, + "refId": "A", + "target": "collectd.$osd_servers.$domain.interface.{en,eth,bond}*.if_octets.rx" + }, + { + "hide": true, + "refId": "B", + "target": "collectd.$osd_servers.$domain.interface.{en,eth,bond}*.if_octets.tx" + }, + { + "refId": "C", + "target": "alias(sumSeries(#A,#B), \"Network load (rx+tx)\")", + "targetFull": "alias(sumSeries(collectd.$osd_servers.$domain.interface.{en,eth,bond}*.if_octets.rx,collectd.$osd_servers.$domain.interface.{en,eth,bond}*.if_octets.tx), \"Network load (rx+tx)\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Host CPU and Network Load", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "test.lab", + "value": "test.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "test.lab", + "value": "test.lab" + } + ], + "query": "test.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": "", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "label": "OSD Hostname", + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "ceph-1", + "value": "ceph-1" + }, + { + "selected": false, + "text": "ceph-2", + "value": "ceph-2" + }, + { + "selected": false, + "text": "ceph-3", + "value": "ceph-3" + } + ], + "query": "ceph-1,ceph-2,ceph-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Backend Storage", + "version": 26 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-03T21:42:28Z", + "createdBy": "admin", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-backend-storage", + "type": "db", + "updated": "2017-09-12T23:43:51Z", + "updatedBy": "admin", + "version": 26 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/ceph-cluster.json b/dashboards/cephmetrics-graphite/ceph-cluster.json new file mode 100644 index 0000000..eb18733 --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-cluster.json @@ -0,0 +1,2250 @@ +{ + "meta": { + "updated": "2017-08-21T04:31:01Z", + "created": "2017-08-03T21:42:28Z", + "canStar": true, + "expires": "0001-01-01T00:00:00Z", + "canSave": true, + "canEdit": true, + "version": 30, + "createdBy": "admin", + "updatedBy": "admin", + "type": "db", + "slug": "ceph-cluster" + }, + "dashboard": { + "style": "dark", + "rows": [ + { + "repeat": null, + "titleSize": "h5", + "repeatIteration": null, + "title": "Cluster Configuration", + "height": "100", + "repeatRowId": null, + "panels": [ + { + "span": 1, + "links": [], + "title": "", + "transparent": true, + "content": "", + "mode": "markdown", + "minSpan": 1, + "type": "text", + "id": 74 + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "to": "null", + "from": "null", + "text": "N/A" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"mons\")", + "tags": [], + "groupBy": [], + "textEditor": true, + "resultFormat": "time_series", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ] + } + ], + "maxDataPoints": 100, + "mappingType": 1, + "span": 1, + "colorBackground": false, + "title": "MONs", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 73, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "decimals": 0, + "interval": null, + "prefix": "", + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "prefix": "", + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "id": 68, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": false, + "title": "OSD Hosts", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "target": "maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_hosts, \"max\"))", + "refId": "A" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "postfixFontSize": "50%", + "interval": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "decimals": 0, + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "colorBackground": false, + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "prefix": "", + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "id": 72, + "maxDataPoints": 100, + "mappingType": 1, + "span": 1, + "links": [], + "title": "MDS", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_up), \"max\")),\"mds\")", + "tags": [], + "refId": "A", + "textEditor": true, + "resultFormat": "time_series", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "groupBy": [], + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ] + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "postfixFontSize": "50%", + "interval": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "decimals": 0, + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "0", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put)" + } + ], + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": false, + "title": "RGW Hosts", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 69, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "postfixFontSize": "50%", + "interval": null, + "prefix": "", + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "decimals": 0, + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "0", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "id": 71, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": false, + "title": "iSCSI Hosts", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "countSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_name.*)" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "postfixFontSize": "50%", + "interval": null, + "prefix": "", + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "decimals": 0, + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "thresholdMarkers": true, + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false + }, + "id": 67, + "maxDataPoints": 100, + "mappingType": 1, + "span": 2, + "colorBackground": false, + "title": "OSDs", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),\"total\")", + "tags": [], + "refId": "A", + "textEditor": true, + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "groupBy": [], + "resultFormat": "time_series" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "decimals": 0, + "interval": null, + "prefix": "", + "datasource": "Local", + "nullText": null, + "minSpan": 2, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "colorBackground": false, + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "id": 17, + "maxDataPoints": 100, + "mappingType": 1, + "span": 1, + "links": [], + "title": "Pools", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "target": "maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pool)", + "tags": [], + "groupBy": [], + "textEditor": true, + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + } + ] + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "none", + "cacheTimeout": null, + "postfix": "", + "interval": null, + "prefix": "", + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "gauge": { + "show": false, + "thresholdMarkers": true, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"max\")),\"freespacel\")" + } + ], + "maxDataPoints": 100, + "mappingType": 1, + "span": 2, + "colorBackground": false, + "title": "Unused Capacity", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 70, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "80%", + "format": "decbytes", + "cacheTimeout": null, + "postfix": "", + "decimals": 1, + "interval": null, + "prefix": "", + "datasource": null, + "nullText": null, + "minSpan": 2, + "postfixFontSize": "50%", + "colorValue": false + } + ], + "showTitle": true, + "collapse": false + }, + { + "repeat": null, + "titleSize": "h5", + "repeatIteration": null, + "title": "Cluster Flags", + "height": "100", + "repeatRowId": null, + "panels": [ + { + "span": 2, + "links": [], + "title": "", + "id": 66, + "content": "", + "mode": "markdown", + "minSpan": 2, + "type": "text", + "transparent": true + }, + { + "span": 1, + "links": [], + "title": "", + "height": "95", + "content": "

Cluster Flags:

", + "mode": "html", + "minSpan": 1, + "type": "text", + "id": 58, + "transparent": true + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "gauge": { + "thresholdMarkers": true, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false, + "show": false + }, + "id": 57, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": true, + "title": "SCRUB", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.scrub),\"maxSeries\"))" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "cacheTimeout": null, + "height": "95", + "timeShift": null, + "interval": null, + "prefix": "", + "postfix": "", + "hideTimeOverride": true, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "colorBackground": true, + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "height": "95", + "prefix": "", + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"maxSeries\"))" + } + ], + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "links": [], + "title": "DEEP SCRUB", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 59, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "hideTimeOverride": true, + "nullPointMode": "connected", + "postfix": "", + "interval": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "cacheTimeout": null, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "nullPointMode": "connected", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "gauge": { + "show": false, + "thresholdMarkers": true, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false + }, + "id": 60, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": true, + "title": "RECOVERY", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.recovery),\"maxSeries\"))", + "refId": "A" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "format": "none", + "cacheTimeout": null, + "height": "95", + "postfix": "", + "interval": null, + "prefix": "", + "hideTimeOverride": true, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "to": "null", + "from": "null", + "text": "N/A" + } + ], + "height": "95", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "gauge": { + "thresholdMarkers": true, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false, + "show": false + }, + "id": 64, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": true, + "title": "BACKFILL", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.backfill),\"maxSeries\"))", + "refId": "A" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "format": "none", + "cacheTimeout": null, + "nullPointMode": "connected", + "postfix": "", + "interval": null, + "prefix": "", + "hideTimeOverride": true, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "colorBackground": true, + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "text": "N/A", + "from": "null", + "to": "null" + } + ], + "height": "95", + "prefix": "", + "gauge": { + "thresholdMarkers": true, + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdLabels": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.rebalance),\"maxSeries\"))" + } + ], + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "links": [], + "title": "REBALANCE", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 61, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "format": "none", + "hideTimeOverride": true, + "nullPointMode": "connected", + "postfix": "", + "interval": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "cacheTimeout": null, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "colorBackground": true, + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "to": "null", + "from": "null", + "text": "N/A" + } + ], + "height": "95", + "prefix": "", + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "targets": [ + { + "textEditor": true, + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.out),\"maxSeries\"))", + "refId": "A" + } + ], + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "links": [], + "title": "OUT", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "id": 62, + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "format": "none", + "hideTimeOverride": true, + "nullPointMode": "connected", + "postfix": "", + "interval": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "cacheTimeout": null, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + }, + { + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "timeFrom": "1m", + "links": [], + "valueMaps": [ + { + "text": "N/A", + "value": "null", + "op": "=" + }, + { + "text": "ENABLED", + "value": "0", + "op": "=" + }, + { + "text": "ACTIVE", + "value": "1", + "op": "=" + }, + { + "text": "DISABLED", + "value": "2", + "op": "=" + }, + { + "text": "NODATA", + "value": "-1", + "op": "=" + } + ], + "tableColumn": "", + "thresholds": "1,2", + "rangeMaps": [ + { + "to": "null", + "from": "null", + "text": "N/A" + } + ], + "height": "95", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "gauge": { + "thresholdMarkers": true, + "thresholdLabels": false, + "minValue": 0, + "maxValue": 100, + "show": false + }, + "id": 63, + "maxDataPoints": "", + "mappingType": 1, + "span": 1, + "colorBackground": true, + "title": "DOWN", + "sparkline": { + "full": false, + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.down),\"maxSeries\"))" + } + ], + "prefixFontSize": "50%", + "valueName": "current", + "type": "singlestat", + "valueFontSize": "40%", + "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "format": "none", + "cacheTimeout": null, + "nullPointMode": "connected", + "postfix": "", + "interval": null, + "prefix": "", + "hideTimeOverride": true, + "datasource": "Local", + "nullText": null, + "minSpan": 1, + "postfixFontSize": "50%", + "colorValue": false + } + ], + "showTitle": false, + "collapse": false + }, + { + "repeat": null, + "titleSize": "h5", + "collapse": true, + "title": "Cluster Capacity", + "height": "250", + "repeatRowId": null, + "panels": [ + { + "bars": false, + "timeFrom": "7d", + "links": [], + "thresholds": [], + "spaceLength": 10, + "nullPointMode": "connected", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "target": "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used), 'Used')", + "tags": [], + "groupBy": [], + "alias": "Used", + "textEditor": true, + "resultFormat": "time_series", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + } + ] + ] + }, + { + "dsType": "influxdb", + "target": "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes), 'Raw')", + "tags": [], + "refId": "A", + "alias": "Raw Capacity", + "textEditor": true, + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "groupBy": [], + "resultFormat": "time_series" + } + ], + "fill": 1, + "span": 4, + "title": "Cluster Capacity - Past 7 Days", + "tooltip": { + "sort": 1, + "shared": true, + "value_type": "individual" + }, + "id": 3, + "yaxes": [ + { + "logBase": 1, + "show": true, + "max": null, + "format": "decbytes", + "min": "0", + "label": null + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "label": null, + "show": false + } + ], + "xaxis": { + "buckets": null, + "show": true, + "values": [], + "mode": "time", + "name": null + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Shows the Capacity within the cluster over the past 7 days", + "hideTimeOverride": true, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "lines": true, + "legend": { + "avg": false, + "show": true, + "max": false, + "min": false, + "current": true, + "values": true, + "total": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 6 + }, + { + "bars": false, + "timeFrom": "7d", + "links": [], + "thresholds": [], + "spaceLength": 10, + "nullPointMode": "connected", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.percent_used, -2, 'maxSeries')", + "tags": [], + "refId": "A", + "alias": "Used", + "textEditor": true, + "resultFormat": "time_series", + "policy": "default", + "groupBy": [], + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ] + } + ], + "fill": 1, + "span": 4, + "title": "Pool Capacity - Past 7 Days", + "tooltip": { + "sort": 2, + "shared": true, + "value_type": "individual" + }, + "id": 82, + "points": false, + "xaxis": { + "buckets": null, + "values": [], + "mode": "time", + "name": null, + "show": true + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Shows the Capacity within each pool over the past 7 days", + "hideTimeOverride": true, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "lines": true, + "legend": { + "avg": false, + "min": false, + "max": false, + "show": true, + "current": false, + "values": false, + "alignAsTable": false, + "total": false + }, + "yaxes": [ + { + "logBase": 1, + "format": "percent", + "max": "100", + "min": "0", + "label": "", + "show": true + }, + { + "logBase": 1, + "format": "short", + "max": null, + "min": null, + "label": null, + "show": false + } + ], + "datasource": "Local", + "pointradius": 5, + "minSpan": 6, + "decimals": 2 + }, + { + "bars": false, + "timeFrom": "7d", + "links": [], + "thresholds": [], + "spaceLength": 10, + "nullPointMode": "connected", + "renderer": "flot", + "linewidth": 1, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "target": "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_object), 'RADOS Objects')", + "tags": [], + "groupBy": [], + "alias": "Used", + "textEditor": true, + "resultFormat": "time_series", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ] + } + ], + "fill": 1, + "span": 4, + "title": "RADOS Object History - Past 7 Days", + "tooltip": { + "sort": 1, + "shared": true, + "value_type": "individual" + }, + "id": 65, + "yaxes": [ + { + "logBase": 1, + "min": "0", + "max": null, + "format": "short", + "label": "RADOS Object Count", + "show": true + }, + { + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "show": false, + "label": null + } + ], + "xaxis": { + "mode": "time", + "buckets": null, + "values": [], + "name": null, + "show": true + }, + "seriesOverrides": [], + "percentage": false, + "type": "graph", + "dashes": false, + "description": "Shows the Capacity within the cluster over the past 7 days", + "hideTimeOverride": true, + "dashLength": 10, + "stack": false, + "timeShift": null, + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "lines": true, + "legend": { + "total": false, + "min": false, + "max": false, + "show": true, + "current": true, + "values": true, + "avg": false + }, + "points": false, + "datasource": "Local", + "pointradius": 5, + "minSpan": 6, + "decimals": 2 + } + ], + "showTitle": true, + "repeatIteration": null + }, + { + "repeat": null, + "titleSize": "h5", + "repeatIteration": null, + "title": "Ceph Version Configuration", + "height": 250, + "repeatRowId": null, + "panels": [ + { + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "type": "date", + "pattern": "Time" + }, + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "colorMode": null, + "thresholds": [], + "alias": "Version", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 1, + "pattern": "Current", + "type": "number", + "unit": "short" + }, + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Metric", + "thresholds": [], + "alias": "Host", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "colorMode": null, + "type": "number", + "unit": "short" + }, + { + "pattern": "/.*/", + "thresholds": [], + "alias": "", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "colorMode": null, + "type": "number", + "unit": "short" + } + ], + "sort": { + "col": 0, + "desc": true + }, + "timeFrom": "2m", + "span": 2, + "pageSize": 10, + "links": [], + "title": "MON Versions", + "timeShift": null, + "transform": "timeseries_aggregations", + "showHeader": true, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.ceph_version, 1)" + } + ], + "hideTimeOverride": true, + "fontSize": "100%", + "id": 75, + "minSpan": 2, + "type": "table", + "scroll": true, + "columns": [ + { + "text": "Current", + "value": "current" + } + ] + }, + { + "span": 1, + "links": [], + "title": "", + "transparent": true, + "content": "", + "mode": "markdown", + "minSpan": 1, + "type": "text", + "id": 79 + }, + { + "styles": [ + { + "pattern": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "type": "date", + "alias": "Time" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "colorMode": null, + "thresholds": [], + "alias": "Version", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern": "Current", + "decimals": 1, + "unit": "short" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Metric", + "thresholds": [], + "alias": "Host", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode": null, + "decimals": 2, + "unit": "short" + }, + { + "pattern": "/.*/", + "thresholds": [], + "alias": "", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "colorMode": null, + "type": "number", + "unit": "short" + } + ], + "sort": { + "col": 0, + "desc": true + }, + "timeFrom": "2m", + "span": 2, + "pageSize": 10, + "links": [], + "title": "OSD Versions", + "timeShift": null, + "transform": "timeseries_aggregations", + "showHeader": true, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.ceph_version, 1)" + } + ], + "hideTimeOverride": true, + "fontSize": "100%", + "id": 76, + "minSpan": 2, + "type": "table", + "scroll": true, + "columns": [ + { + "text": "Current", + "value": "current" + } + ] + }, + { + "span": 1, + "links": [], + "title": "", + "transparent": true, + "content": "", + "mode": "markdown", + "minSpan": 1, + "type": "text", + "id": 80 + }, + { + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "type": "date", + "pattern": "Time" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Current", + "thresholds": [], + "alias": "Version", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode": null, + "decimals": 1, + "unit": "short" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Metric", + "thresholds": [], + "alias": "Host", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode": null, + "decimals": 2, + "unit": "short" + }, + { + "type": "number", + "pattern": "/.*/", + "thresholds": [], + "alias": "", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode": null, + "decimals": 2, + "unit": "short" + } + ], + "sort": { + "col": 0, + "desc": true + }, + "timeFrom": "2m", + "span": 2, + "pageSize": 10, + "links": [], + "title": "RGW Versions", + "transform": "timeseries_aggregations", + "showHeader": true, + "targets": [ + { + "textEditor": true, + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.rgw.ceph_version, 1)", + "refId": "A" + } + ], + "hideTimeOverride": true, + "fontSize": "100%", + "id": 77, + "minSpan": 2, + "type": "table", + "scroll": true, + "columns": [ + { + "text": "Current", + "value": "current" + } + ] + }, + { + "span": 1, + "links": [], + "title": "", + "id": 81, + "content": "", + "mode": "markdown", + "minSpan": 1, + "type": "text", + "transparent": true + }, + { + "styles": [ + { + "pattern": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "type": "date", + "alias": "Time" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Current", + "thresholds": [], + "alias": "Version", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode": null, + "decimals": 1, + "unit": "short" + }, + { + "type": "number", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "colorMode": null, + "thresholds": [], + "alias": "Host", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern": "Metric", + "decimals": 2, + "unit": "short" + }, + { + "pattern": "/.*/", + "thresholds": [], + "alias": "", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "colorMode": null, + "type": "number", + "unit": "short" + } + ], + "sort": { + "col": 0, + "desc": true + }, + "timeFrom": "2m", + "span": 2, + "links": [], + "pageSize": 10, + "title": "iSCSI Versions", + "scroll": true, + "transform": "timeseries_aggregations", + "showHeader": true, + "id": 78, + "hideTimeOverride": true, + "fontSize": "100%", + "minSpan": 2, + "type": "table", + "targets": [ + { + "textEditor": true, + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.ceph_version, 1)" + } + ], + "columns": [ + { + "text": "Current", + "value": "current" + } + ] + } + ], + "showTitle": true, + "collapse": false + } + ], + "templating": { + "list": [ + { + "regex": "", + "current": { + "text": "ceph", + "selected": true, + "value": "ceph" + }, + "multi": false, + "hide": 2, + "name": "cluster_name", + "tags": [], + "allValue": null, + "type": "query", + "refresh": 1, + "label": null, + "tagValuesQuery": "", + "sort": 0, + "datasource": "Local", + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "useTags": false, + "tagsQuery": "", + "options": [], + "includeAll": false + }, + { + "multi": false, + "hide": 2, + "name": "domain", + "allValue": null, + "label": null, + "current": { + "text": "test.lab", + "selected": true, + "value": "test.lab" + }, + "query": "test.lab", + "type": "custom", + "options": [ + { + "text": "test.lab", + "selected": true, + "value": "test.lab" + } + ], + "includeAll": false + } + ] + }, + "links": [ + { + "keepTime": true, + "tags": [ + "overview" + ], + "title": "Shortcuts", + "includeVars": true, + "targetBlank": true, + "asDropdown": true, + "type": "dashboards", + "icon": "external link" + } + ], + "tags": [ + "overview" + ], + "graphTooltip": 0, + "hideControls": true, + "title": "Ceph Cluster", + "editable": false, + "refresh": "10s", + "id": 25, + "gnetId": null, + "timepicker": { + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "version": 30, + "time": { + "to": "now", + "from": "now-1h" + }, + "timezone": "browser", + "schemaVersion": 14, + "annotations": { + "list": [] + } + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/ceph-health.json b/dashboards/cephmetrics-graphite/ceph-health.json new file mode 100644 index 0000000..0a53200 --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-health.json @@ -0,0 +1,1979 @@ +{ + "dashboard" : { + "templating" : { + "list" : [ + { + "current" : { + "value" : "ceph", + "text" : "ceph", + "selected" : true + }, + "refresh" : 1, + "multi" : false, + "sort" : 0, + "regex" : "", + "hide" : 2, + "includeAll" : false, + "tagValuesQuery" : "", + "tagsQuery" : "", + "name" : "cluster_name", + "datasource" : "Local", + "tags" : [], + "allValue" : null, + "useTags" : false, + "options" : [], + "query" : "collectd.*.$domain.cephmetrics.gauge.*", + "type" : "query", + "label" : null + }, + { + "allValue" : null, + "name" : "domain", + "hide" : 2, + "includeAll" : false, + "current" : { + "selected" : true, + "text" : "test.lab", + "value" : "test.lab" + }, + "multi" : false, + "type" : "custom", + "label" : null, + "query" : "test.lab", + "options" : [ + { + "value" : "test.lab", + "selected" : true, + "text" : "test.lab" + } + ] + }, + { + "current" : { + "value" : "$__all", + "selected" : true, + "text" : "All" + }, + "includeAll" : true, + "hide" : 2, + "allValue" : null, + "name" : "osd_servers", + "options" : [ + { + "selected" : true, + "text" : "All", + "value" : "$__all" + }, + { + "text" : "ceph-1", + "selected" : false, + "value" : "ceph-1" + }, + { + "selected" : false, + "text" : "ceph-2", + "value" : "ceph-2" + }, + { + "value" : "ceph-3", + "text" : "ceph-3", + "selected" : false + } + ], + "multi" : true, + "label" : null, + "type" : "custom", + "query" : "ceph-1,ceph-2,ceph-3" + }, + { + "options" : [ + { + "value" : "85", + "selected" : true, + "text" : "85" + } + ], + "multi" : false, + "query" : "85", + "type" : "custom", + "label" : null, + "current" : { + "text" : "85", + "selected" : true, + "value" : "85" + }, + "hide" : 2, + "includeAll" : false, + "allValue" : null, + "name" : "disk_full_threshold" + } + ] + }, + "rows" : [ + { + "height" : "70", + "repeat" : null, + "collapse" : false, + "repeatRowId" : null, + "titleSize" : "h6", + "repeatIteration" : null, + "panels" : [ + { + "timeShift" : null, + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "gauge" : { + "thresholdMarkers" : true, + "thresholdLabels" : false, + "show" : false, + "minValue" : 0, + "maxValue" : 100 + }, + "mappingTypes" : [ + { + "value" : 1, + "name" : "value to text" + }, + { + "name" : "range to text", + "value" : 2 + } + ], + "height" : "70", + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "maxDataPoints" : "", + "rangeMaps" : [ + { + "text" : "HEALTH OK", + "from" : "0", + "to" : "1" + }, + { + "to" : "4", + "text" : "HEALTH WARNING", + "from" : "1" + }, + { + "to" : "99", + "text" : "HEALTH ERROR", + "from" : "5" + }, + { + "text" : "NODATA", + "from" : "-10", + "to" : "0" + } + ], + "format" : "none", + "valueMaps" : [ + { + "text" : "OK", + "op" : "=", + "value" : "0" + }, + { + "text" : "WARN", + "op" : "=", + "value" : "4" + }, + { + "text" : "ERROR", + "op" : "=", + "value" : "8" + } + ], + "type" : "singlestat", + "cacheTimeout" : null, + "title" : "", + "targets" : [ + { + "target" : "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.health),\"max\")", + "textEditor" : true, + "refId" : "A" + } + ], + "prefix" : "", + "valueName" : "current", + "description" : "Shows the overall health of the ceph cluster. To see specific health messages hover over the annotation in the health chart below.", + "datasource" : "Local", + "colorValue" : false, + "colorBackground" : true, + "links" : [], + "postfix" : "", + "nullText" : null, + "minSpan" : 1, + "nullPointMode" : "connected", + "colors" : [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "mappingType" : 2, + "interval" : null, + "id" : 76, + "span" : 12, + "valueFontSize" : "50%", + "thresholds" : "1,5", + "sparkline" : { + "lineColor" : "rgb(31, 120, 193)", + "show" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "full" : false + }, + "tableColumn" : "" + } + ], + "title" : "Dashboard Row", + "showTitle" : false + }, + { + "height" : "350", + "repeat" : null, + "collapse" : false, + "repeatRowId" : null, + "titleSize" : "h5", + "repeatIteration" : null, + "panels" : [ + { + "xaxis" : { + "mode" : "time", + "buckets" : null, + "show" : true, + "values" : [], + "name" : null + }, + "pointradius" : 5, + "thresholds" : [ + { + "value" : 3, + "fillColor" : "rgba(222, 226, 0, 0.47)", + "lineColor" : "rgb(247, 172, 0)", + "line" : true, + "fill" : false, + "colorMode" : "custom", + "op" : "gt" + }, + { + "fillColor" : "rgba(246, 3, 3, 0.5)", + "value" : 7, + "fill" : false, + "line" : true, + "op" : "gt", + "colorMode" : "custom", + "lineColor" : "rgb(203, 0, 0)" + } + ], + "span" : 12, + "dashLength" : 10, + "id" : 55, + "description" : "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 4 or 8 where 0 is OK, 4 is WARN and 8 represents an ERROR state. Horizontal thresholds (drawn in yellow and red) indicate when the cluster's health transitions to 'warning' or 'error'. Annotations describing the details of the clusters health may also be shown if the \"EventURL\" setting is defined to the collectd plugin on the mon hosts.", + "datasource" : "Local", + "bars" : false, + "fill" : 1, + "yaxes" : [ + { + "logBase" : 1, + "format" : "short", + "min" : "-1", + "show" : false, + "label" : "", + "max" : "10" + }, + { + "format" : "short", + "logBase" : 1, + "max" : null, + "label" : null, + "show" : false, + "min" : null + } + ], + "nullPointMode" : "null", + "minSpan" : 3, + "dashes" : false, + "links" : [], + "aliasColors" : { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.health" : "#DEDAF7", + "Ceph Health (0:OK, 4:Warning,8:Error)" : "#DEDAF7", + "Ceph Health" : "#3F6833" + }, + "percentage" : false, + "lines" : true, + "points" : false, + "tooltip" : { + "value_type" : "individual", + "shared" : false, + "sort" : 1 + }, + "steppedLine" : true, + "seriesOverrides" : [], + "title" : "Health History - Last 3 days", + "targets" : [ + { + "target" : "alias(maxSeries(summarize(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.health,\"5min\",\"max\")), 'Ceph Health')", + "textEditor" : true, + "refId" : "A" + } + ], + "hideTimeOverride" : true, + "stack" : false, + "height" : "350", + "spaceLength" : 10, + "linewidth" : 2, + "timeFrom" : "3d", + "renderer" : "flot", + "timeShift" : null, + "type" : "graph", + "maxDataPoints" : "", + "legend" : { + "total" : false, + "current" : false, + "avg" : false, + "min" : false, + "values" : false, + "max" : false, + "show" : true + } + } + ], + "title" : "Cluster Health", + "showTitle" : true + }, + { + "collapse" : false, + "repeatIteration" : null, + "titleSize" : "h5", + "panels" : [ + { + "content" : "", + "transparent" : true, + "minSpan" : 1, + "type" : "text", + "links" : [], + "span" : 1, + "title" : "", + "id" : 69, + "mode" : "html" + }, + { + "pageSize" : null, + "sort" : { + "desc" : false, + "col" : null + }, + "title" : "Monitor Status", + "span" : 3, + "targets" : [ + { + "target" : "aliasByNode(groupByNode(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.mon_status.*),-1),-1,\"maxSeries\"),-1)", + "textEditor" : true, + "refId" : "B" + } + ], + "id" : 56, + "columns" : [ + { + "text" : "Current", + "value" : "current" + } + ], + "showHeader" : true, + "scroll" : true, + "type" : "table", + "minSpan" : 2, + "links" : [], + "transform" : "timeseries_aggregations", + "fontSize" : "100%", + "maxDataPoints" : "", + "hideTimeOverride" : true, + "timeFrom" : "1m", + "styles" : [ + { + "pattern" : "Time", + "alias" : "Time", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "type" : "date" + }, + { + "sanitize" : false, + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Metric", + "type" : "number", + "thresholds" : [], + "unit" : "short", + "colorMode" : null, + "decimals" : 2, + "alias" : "Hostname", + "dateFormat" : "YYYY-MM-DD HH:mm:ss" + }, + { + "thresholds" : [ + "0", + "3", + "7" + ], + "type" : "number", + "alias" : "Status", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "unit" : "short", + "colorMode" : "row", + "decimals" : 0, + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(0, 169, 3, 0.89)", + "rgba(249, 190, 0, 0.97)" + ], + "pattern" : "Current" + }, + { + "type" : "number", + "thresholds" : [], + "unit" : "short", + "decimals" : 2, + "colorMode" : null, + "alias" : "", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "/.*/" + } + ], + "timeShift" : null + }, + { + "content" : "", + "transparent" : true, + "minSpan" : 2, + "type" : "text", + "links" : [], + "span" : 2, + "title" : "", + "id" : 68, + "mode" : "html" + }, + { + "transparent" : true, + "content" : "

Cluster Flags:

", + "minSpan" : 1, + "type" : "text", + "id" : 58, + "mode" : "html", + "links" : [], + "span" : 1, + "title" : "" + }, + { + "gauge" : { + "show" : false, + "minValue" : 0, + "maxValue" : 100, + "thresholdLabels" : false, + "thresholdMarkers" : true + }, + "mappingTypes" : [ + { + "name" : "value to text", + "value" : 1 + }, + { + "value" : 2, + "name" : "range to text" + } + ], + "height" : "95", + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "valueMaps" : [ + { + "text" : "N/A", + "op" : "=", + "value" : "null" + }, + { + "value" : "0", + "text" : "ENABLED", + "op" : "=" + }, + { + "value" : "1", + "op" : "=", + "text" : "ACTIVE" + }, + { + "text" : "DISABLED", + "op" : "=", + "value" : "2" + } + ], + "type" : "singlestat", + "maxDataPoints" : "", + "rangeMaps" : [ + { + "text" : "N/A", + "from" : "null", + "to" : "null" + } + ], + "format" : "none", + "prefix" : "", + "cacheTimeout" : null, + "title" : "SCRUB", + "targets" : [ + { + "refId" : "A", + "textEditor" : true, + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.scrub),\"maxSeries\"))" + } + ], + "description" : "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "datasource" : "Local", + "colorValue" : false, + "colorBackground" : true, + "valueName" : "current", + "nullText" : null, + "minSpan" : 1, + "nullPointMode" : "connected", + "links" : [], + "postfix" : "", + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "mappingType" : 1, + "interval" : null, + "thresholds" : "1,2", + "sparkline" : { + "lineColor" : "rgb(31, 120, 193)", + "fillColor" : "rgba(31, 118, 189, 0.18)", + "show" : false, + "full" : false + }, + "tableColumn" : "", + "id" : 57, + "span" : 1, + "valueFontSize" : "40%" + }, + { + "thresholds" : "1,2", + "tableColumn" : "", + "sparkline" : { + "full" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "show" : false, + "lineColor" : "rgb(31, 120, 193)" + }, + "id" : 59, + "valueFontSize" : "40%", + "span" : 1, + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "interval" : null, + "mappingType" : 1, + "nullText" : null, + "nullPointMode" : "connected", + "minSpan" : 1, + "links" : [], + "postfix" : "", + "description" : "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "datasource" : "Local", + "colorBackground" : true, + "colorValue" : false, + "valueName" : "current", + "prefix" : "", + "cacheTimeout" : null, + "title" : "DEEP SCRUB", + "targets" : [ + { + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"maxSeries\"))", + "textEditor" : true, + "refId" : "A" + } + ], + "valueMaps" : [ + { + "value" : "null", + "text" : "N/A", + "op" : "=" + }, + { + "value" : "0", + "text" : "ENABLED", + "op" : "=" + }, + { + "value" : "1", + "text" : "ACTIVE", + "op" : "=" + }, + { + "value" : "2", + "op" : "=", + "text" : "DISABLED" + } + ], + "type" : "singlestat", + "rangeMaps" : [ + { + "to" : "null", + "from" : "null", + "text" : "N/A" + } + ], + "maxDataPoints" : "", + "format" : "none", + "height" : "95", + "mappingTypes" : [ + { + "name" : "value to text", + "value" : 1 + }, + { + "name" : "range to text", + "value" : 2 + } + ], + "gauge" : { + "maxValue" : 100, + "show" : false, + "minValue" : 0, + "thresholdMarkers" : true, + "thresholdLabels" : false + }, + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "prefixFontSize" : "50%", + "timeFrom" : "1m" + }, + { + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "mappingType" : 1, + "interval" : null, + "id" : 60, + "valueFontSize" : "40%", + "span" : 1, + "thresholds" : "1,2", + "sparkline" : { + "lineColor" : "rgb(31, 120, 193)", + "show" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "full" : false + }, + "tableColumn" : "", + "valueName" : "current", + "datasource" : "Local", + "description" : "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "colorValue" : false, + "colorBackground" : true, + "postfix" : "", + "links" : [], + "nullText" : null, + "nullPointMode" : "connected", + "minSpan" : 1, + "cacheTimeout" : null, + "targets" : [ + { + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.recovery),\"maxSeries\"))", + "textEditor" : true, + "refId" : "A" + } + ], + "title" : "RECOVERY", + "prefix" : "", + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "gauge" : { + "minValue" : 0, + "show" : false, + "maxValue" : 100, + "thresholdMarkers" : true, + "thresholdLabels" : false + }, + "mappingTypes" : [ + { + "value" : 1, + "name" : "value to text" + }, + { + "name" : "range to text", + "value" : 2 + } + ], + "height" : "95", + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "maxDataPoints" : "", + "rangeMaps" : [ + { + "to" : "null", + "from" : "null", + "text" : "N/A" + } + ], + "format" : "none", + "type" : "singlestat", + "valueMaps" : [ + { + "value" : "null", + "op" : "=", + "text" : "N/A" + }, + { + "op" : "=", + "text" : "ENABLED", + "value" : "0" + }, + { + "op" : "=", + "text" : "ACTIVE", + "value" : "1" + }, + { + "value" : "2", + "op" : "=", + "text" : "DISABLED" + } + ] + }, + { + "cacheTimeout" : null, + "targets" : [ + { + "refId" : "A", + "textEditor" : true, + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.backfill),\"maxSeries\"))" + } + ], + "title" : "BACKFILL", + "prefix" : "", + "maxDataPoints" : "", + "rangeMaps" : [ + { + "to" : "null", + "from" : "null", + "text" : "N/A" + } + ], + "format" : "none", + "type" : "singlestat", + "valueMaps" : [ + { + "op" : "=", + "text" : "N/A", + "value" : "null" + }, + { + "op" : "=", + "text" : "ENABLED", + "value" : "0" + }, + { + "text" : "ACTIVE", + "op" : "=", + "value" : "1" + }, + { + "value" : "2", + "op" : "=", + "text" : "DISABLED" + } + ], + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "gauge" : { + "thresholdMarkers" : true, + "thresholdLabels" : false, + "minValue" : 0, + "show" : false, + "maxValue" : 100 + }, + "height" : "95", + "mappingTypes" : [ + { + "value" : 1, + "name" : "value to text" + }, + { + "name" : "range to text", + "value" : 2 + } + ], + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "id" : 64, + "span" : 1, + "valueFontSize" : "40%", + "thresholds" : "1,2", + "sparkline" : { + "lineColor" : "rgb(31, 120, 193)", + "show" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "full" : false + }, + "tableColumn" : "", + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "mappingType" : 1, + "interval" : null, + "postfix" : "", + "links" : [], + "nullText" : null, + "nullPointMode" : "connected", + "minSpan" : 1, + "valueName" : "current", + "datasource" : "Local", + "colorValue" : false, + "colorBackground" : true + }, + { + "prefix" : "", + "targets" : [ + { + "refId" : "A", + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.rebalance),\"maxSeries\"))", + "textEditor" : true + } + ], + "title" : "REBALANCE", + "cacheTimeout" : null, + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "gauge" : { + "minValue" : 0, + "show" : false, + "maxValue" : 100, + "thresholdMarkers" : true, + "thresholdLabels" : false + }, + "height" : "95", + "mappingTypes" : [ + { + "name" : "value to text", + "value" : 1 + }, + { + "value" : 2, + "name" : "range to text" + } + ], + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "valueMaps" : [ + { + "value" : "null", + "op" : "=", + "text" : "N/A" + }, + { + "value" : "0", + "text" : "ENABLED", + "op" : "=" + }, + { + "op" : "=", + "text" : "ACTIVE", + "value" : "1" + }, + { + "text" : "DISABLED", + "op" : "=", + "value" : "2" + } + ], + "type" : "singlestat", + "format" : "none", + "maxDataPoints" : "", + "rangeMaps" : [ + { + "text" : "N/A", + "from" : "null", + "to" : "null" + } + ], + "mappingType" : 1, + "interval" : null, + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "sparkline" : { + "show" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "full" : false, + "lineColor" : "rgb(31, 120, 193)" + }, + "tableColumn" : "", + "thresholds" : "1,2", + "valueFontSize" : "40%", + "span" : 1, + "id" : 61, + "colorValue" : false, + "colorBackground" : true, + "datasource" : "Local", + "valueName" : "current", + "minSpan" : 1, + "nullPointMode" : "connected", + "nullText" : null, + "links" : [], + "postfix" : "" + }, + { + "prefixFontSize" : "50%", + "timeFrom" : "1m", + "timeShift" : null, + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "height" : "95", + "mappingTypes" : [ + { + "name" : "value to text", + "value" : 1 + }, + { + "value" : 2, + "name" : "range to text" + } + ], + "gauge" : { + "thresholdMarkers" : true, + "thresholdLabels" : false, + "maxValue" : 100, + "minValue" : 0, + "show" : false + }, + "format" : "none", + "rangeMaps" : [ + { + "to" : "null", + "text" : "N/A", + "from" : "null" + } + ], + "maxDataPoints" : "", + "type" : "singlestat", + "valueMaps" : [ + { + "text" : "N/A", + "op" : "=", + "value" : "null" + }, + { + "text" : "ENABLED", + "op" : "=", + "value" : "0" + }, + { + "value" : "1", + "text" : "ACTIVE", + "op" : "=" + }, + { + "text" : "DISABLED", + "op" : "=", + "value" : "2" + }, + { + "value" : "-1", + "text" : "NODATA", + "op" : "=" + } + ], + "title" : "OUT", + "targets" : [ + { + "refId" : "A", + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.out),\"maxSeries\"))", + "textEditor" : true + } + ], + "cacheTimeout" : null, + "prefix" : "", + "valueName" : "current", + "colorBackground" : true, + "colorValue" : false, + "description" : "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "datasource" : "Local", + "postfix" : "", + "links" : [], + "nullPointMode" : "connected", + "minSpan" : 1, + "nullText" : null, + "interval" : null, + "mappingType" : 1, + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "span" : 1, + "valueFontSize" : "40%", + "id" : 62, + "tableColumn" : "", + "sparkline" : { + "lineColor" : "rgb(31, 120, 193)", + "full" : false, + "show" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)" + }, + "thresholds" : "1,2" + }, + { + "sparkline" : { + "full" : false, + "fillColor" : "rgba(31, 118, 189, 0.18)", + "show" : false, + "lineColor" : "rgb(31, 120, 193)" + }, + "tableColumn" : "", + "thresholds" : "1,2", + "span" : 1, + "valueFontSize" : "40%", + "id" : 63, + "mappingType" : 1, + "interval" : null, + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "minSpan" : 1, + "nullPointMode" : "connected", + "nullText" : null, + "postfix" : "", + "links" : [], + "colorValue" : false, + "colorBackground" : true, + "description" : "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "datasource" : "Local", + "valueName" : "current", + "prefix" : "", + "targets" : [ + { + "refId" : "A", + "target" : "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.down),\"maxSeries\"))", + "textEditor" : true + } + ], + "title" : "DOWN", + "cacheTimeout" : null, + "valueMaps" : [ + { + "value" : "null", + "text" : "N/A", + "op" : "=" + }, + { + "value" : "0", + "text" : "ENABLED", + "op" : "=" + }, + { + "value" : "1", + "op" : "=", + "text" : "ACTIVE" + }, + { + "text" : "DISABLED", + "op" : "=", + "value" : "2" + } + ], + "type" : "singlestat", + "format" : "none", + "maxDataPoints" : "", + "rangeMaps" : [ + { + "from" : "null", + "text" : "N/A", + "to" : "null" + } + ], + "postfixFontSize" : "50%", + "hideTimeOverride" : true, + "gauge" : { + "thresholdLabels" : false, + "thresholdMarkers" : true, + "maxValue" : 100, + "minValue" : 0, + "show" : false + }, + "height" : "95", + "mappingTypes" : [ + { + "name" : "value to text", + "value" : 1 + }, + { + "value" : 2, + "name" : "range to text" + } + ], + "timeFrom" : "1m", + "prefixFontSize" : "50%", + "timeShift" : null + } + ], + "repeatRowId" : null, + "title" : "Cluster State", + "showTitle" : true, + "height" : "200", + "repeat" : null + }, + { + "repeat" : null, + "height" : 250, + "collapse" : false, + "showTitle" : true, + "title" : "Storage Information", + "repeatRowId" : null, + "panels" : [ + { + "scroll" : true, + "showHeader" : true, + "styles" : [ + { + "pattern" : "Time", + "alias" : "Time", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "type" : "date" + }, + { + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Current", + "thresholds" : [], + "type" : "hidden", + "alias" : "% Full", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "unit" : "none", + "decimals" : 0, + "colorMode" : null + }, + { + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Metric", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "alias" : "Host.OSD Id", + "unit" : "short", + "decimals" : 2, + "colorMode" : null, + "thresholds" : [], + "type" : "number" + }, + { + "pattern" : "/.*/", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "alias" : "", + "colorMode" : null, + "decimals" : 2, + "unit" : "short", + "thresholds" : [], + "type" : "number" + } + ], + "columns" : [ + { + "value" : "current", + "text" : "Current" + } + ], + "transform" : "timeseries_aggregations", + "id" : 75, + "fontSize" : "100%", + "title" : "Disks Near Full", + "span" : 1, + "targets" : [ + { + "refId" : "A", + "textEditor" : true, + "target" : "aliasByNode(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_percent_used),0),$disk_full_threshold),1,-2)" + } + ], + "links" : [], + "sort" : { + "desc" : true, + "col" : 0 + }, + "type" : "table", + "pageSize" : null, + "minSpan" : 1 + }, + { + "scroll" : true, + "showHeader" : true, + "columns" : [ + { + "text" : "Current", + "value" : "current" + } + ], + "id" : 71, + "title" : "OSD's Down", + "span" : 1, + "targets" : [ + { + "target" : "currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.up,99),-2,\"maxSeries\"),0.5)", + "textEditor" : true, + "refId" : "B", + "hide" : false + } + ], + "sort" : { + "col" : null, + "desc" : false + }, + "pageSize" : 100, + "timeShift" : null, + "styles" : [ + { + "type" : "number", + "thresholds" : [ + "" + ], + "decimals" : 0, + "colorMode" : null, + "unit" : "short", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "alias" : "OSD Id", + "sanitize" : false, + "pattern" : "Metric", + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ] + }, + { + "pattern" : "Current", + "colors" : [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "thresholds" : [ + "0" + ], + "type" : "hidden", + "alias" : "Status", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "colorMode" : "cell", + "decimals" : 0, + "unit" : "short" + } + ], + "timeFrom" : "1m", + "hideTimeOverride" : true, + "fontSize" : "100%", + "transform" : "timeseries_aggregations", + "links" : [], + "minSpan" : 1, + "type" : "table" + }, + { + "fontSize" : "100%", + "transform" : "timeseries_aggregations", + "links" : [], + "minSpan" : 1, + "type" : "table", + "timeShift" : null, + "styles" : [ + { + "pattern" : "Time", + "alias" : "Time", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "type" : "date" + }, + { + "pattern" : "Current", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode" : null, + "decimals" : 0, + "unit" : "none", + "alias" : "", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "type" : "hidden", + "thresholds" : [] + }, + { + "type" : "number", + "thresholds" : [], + "unit" : "short", + "decimals" : 2, + "colorMode" : null, + "alias" : "OSD Id", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Metric" + }, + { + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "/.*/", + "thresholds" : [], + "type" : "number", + "alias" : "", + "unit" : "short", + "colorMode" : null, + "decimals" : 2 + } + ], + "timeFrom" : "5m", + "hideTimeOverride" : true, + "id" : 74, + "title" : "OSDs Out", + "targets" : [ + { + "hide" : false, + "refId" : "B", + "target" : "currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.in,99),-2,\"minSeries\"),0.5)", + "textEditor" : true + } + ], + "span" : 1, + "sort" : { + "desc" : true, + "col" : 0 + }, + "pageSize" : null, + "scroll" : true, + "showHeader" : true, + "columns" : [ + { + "value" : "current", + "text" : "Current" + } + ] + }, + { + "minSpan" : 1, + "type" : "text", + "links" : [], + "title" : "", + "span" : 1, + "id" : 73, + "mode" : "html", + "content" : "", + "transparent" : true + }, + { + "title" : "Object Summary", + "span" : 4, + "targets" : [ + { + "select" : [ + [ + { + "params" : [ + "value" + ], + "type" : "field" + }, + { + "type" : "mean", + "params" : [] + } + ] + ], + "textEditor" : true, + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_object), 'Total Objects')", + "groupBy" : [ + { + "params" : [ + "$__interval" + ], + "type" : "time" + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "policy" : "default", + "dsType" : "influxdb", + "refId" : "A", + "alias" : "Objects", + "tags" : [], + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object" + }, + { + "tags" : [], + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "select" : [ + [ + { + "type" : "field", + "params" : [ + "value" + ] + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "textEditor" : true, + "dsType" : "influxdb", + "groupBy" : [ + { + "params" : [ + "$__interval" + ], + "type" : "time" + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "policy" : "default", + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_object_degraded), 'Objects degraded')", + "refId" : "B", + "alias" : "Objects" + }, + { + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "tags" : [], + "refId" : "C", + "alias" : "Objects", + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced), 'Objects misplaced')", + "policy" : "default", + "dsType" : "influxdb", + "groupBy" : [ + { + "params" : [ + "$__interval" + ], + "type" : "time" + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "select" : [ + [ + { + "type" : "field", + "params" : [ + "value" + ] + }, + { + "type" : "mean", + "params" : [] + } + ] + ], + "textEditor" : true + }, + { + "alias" : "Objects", + "refId" : "D", + "groupBy" : [ + { + "type" : "time", + "params" : [ + "$__interval" + ] + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "policy" : "default", + "dsType" : "influxdb", + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_object_unfound), 'Objects unfound')", + "textEditor" : true, + "select" : [ + [ + { + "params" : [ + "value" + ], + "type" : "field" + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "tags" : [] + } + ], + "id" : 10, + "pageSize" : null, + "sort" : { + "desc" : false, + "col" : null + }, + "showHeader" : true, + "scroll" : true, + "columns" : [ + { + "text" : "Current", + "value" : "current" + } + ], + "filterNull" : false, + "links" : [], + "transform" : "timeseries_aggregations", + "fontSize" : "100%", + "type" : "table", + "minSpan" : 3, + "styles" : [ + { + "type" : "date", + "pattern" : "Time", + "dateFormat" : "YYYY-MM-DD HH:mm:ss" + }, + { + "thresholds" : [], + "type" : "number", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "alias" : "Object State", + "unit" : "short", + "decimals" : 2, + "colorMode" : null, + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Metric" + }, + { + "type" : "number", + "thresholds" : [], + "unit" : "none", + "decimals" : 0, + "colorMode" : null, + "alias" : "Count", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Current" + }, + { + "thresholds" : [], + "type" : "number", + "pattern" : "/.*/", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "colorMode" : null, + "decimals" : 0, + "unit" : "none" + } + ], + "datasource" : "Local" + }, + { + "minSpan" : 1, + "type" : "text", + "links" : [], + "span" : 1, + "title" : "", + "id" : 67, + "mode" : "markdown", + "content" : "", + "transparent" : true + }, + { + "minSpan" : 3, + "type" : "table", + "fontSize" : "100%", + "transform" : "timeseries_aggregations", + "links" : [], + "datasource" : "Local", + "styles" : [ + { + "pattern" : "Time", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "type" : "date" + }, + { + "decimals" : 2, + "colorMode" : null, + "unit" : "short", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "alias" : "PG State", + "type" : "number", + "thresholds" : [], + "pattern" : "Metric", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ] + }, + { + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "Current", + "alias" : "Count", + "dateFormat" : "YYYY-MM-DD HH:mm:ss", + "unit" : "none", + "decimals" : 0, + "colorMode" : null, + "thresholds" : [], + "type" : "number" + }, + { + "thresholds" : [], + "type" : "number", + "colors" : [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "pattern" : "/.*/", + "unit" : "none", + "colorMode" : null, + "decimals" : 0 + } + ], + "sort" : { + "desc" : false, + "col" : null + }, + "pageSize" : null, + "id" : 13, + "span" : 3, + "title" : "PG Summary", + "targets" : [ + { + "tags" : [], + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "resultFormat" : "time_series", + "textEditor" : true, + "select" : [ + [ + { + "params" : [ + "value" + ], + "type" : "field" + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "dsType" : "influxdb", + "policy" : "default", + "groupBy" : [ + { + "type" : "time", + "params" : [ + "$__interval" + ] + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg), 'PGs')", + "refId" : "A", + "alias" : "PG's" + }, + { + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), 'Active PGs')", + "policy" : "default", + "dsType" : "influxdb", + "groupBy" : [ + { + "type" : "time", + "params" : [ + "$__interval" + ] + }, + { + "type" : "fill", + "params" : [ + "null" + ] + } + ], + "textEditor" : true, + "select" : [ + [ + { + "params" : [ + "value" + ], + "type" : "field" + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "refId" : "B", + "alias" : "PG's", + "tags" : [], + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "resultFormat" : "time_series" + }, + { + "tags" : [], + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "select" : [ + [ + { + "params" : [ + "value" + ], + "type" : "field" + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "textEditor" : true, + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), 'Active+clean PGs')", + "dsType" : "influxdb", + "policy" : "default", + "groupBy" : [ + { + "params" : [ + "$__interval" + ], + "type" : "time" + }, + { + "type" : "fill", + "params" : [ + "null" + ] + } + ], + "refId" : "C", + "alias" : "PG's" + }, + { + "resultFormat" : "time_series", + "measurement" : "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "tags" : [], + "alias" : "PG's", + "refId" : "D", + "textEditor" : true, + "select" : [ + [ + { + "type" : "field", + "params" : [ + "value" + ] + }, + { + "params" : [], + "type" : "mean" + } + ] + ], + "groupBy" : [ + { + "type" : "time", + "params" : [ + "$__interval" + ] + }, + { + "params" : [ + "null" + ], + "type" : "fill" + } + ], + "dsType" : "influxdb", + "policy" : "default", + "target" : "alias(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_peering), 'PGs peering')" + } + ], + "filterNull" : false, + "columns" : [ + { + "value" : "current", + "text" : "Current" + } + ], + "scroll" : true, + "showHeader" : true + } + ], + "titleSize" : "h5", + "repeatIteration" : null + } + ], + "hideControls" : true, + "title" : "Ceph Health", + "timepicker" : { + "refresh_intervals" : [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options" : [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "id" : 29, + "annotations" : { + "list" : [ + { + "type" : "alert", + "iconColor" : "rgba(255, 96, 96, 1)", + "showIn" : 0, + "enable" : true, + "datasource" : "Local", + "tags" : "health_alert", + "name" : "health_alert", + "limit" : 100, + "hide" : false + }, + { + "datasource" : "Local", + "tags" : "health_ok", + "enable" : true, + "name" : "health_ok", + "limit" : 100, + "hide" : false, + "type" : "alert", + "iconColor" : "rgb(1, 195, 0)", + "showIn" : 0 + } + ] + }, + "schemaVersion" : 14, + "timezone" : "browser", + "graphTooltip" : 0, + "refresh" : "10s", + "version" : 43, + "editable" : false, + "links" : [ + { + "keepTime" : true, + "tags" : [ + "overview" + ], + "includeVars" : true, + "asDropdown" : true, + "targetBlank" : true, + "icon" : "external link", + "title" : "Shortcuts", + "type" : "dashboards" + } + ], + "style" : "dark", + "time" : { + "to" : "now", + "from" : "now-1h" + }, + "gnetId" : null, + "tags" : [ + "overview" + ] + }, + "meta" : { + "updatedBy" : "admin", + "canEdit" : true, + "createdBy" : "admin", + "type" : "db", + "canSave" : true, + "updated" : "2017-08-21T03:34:51Z", + "expires" : "0001-01-01T00:00:00Z", + "canStar" : true, + "version" : 43, + "created" : "2017-08-08T03:07:37Z", + "slug" : "ceph-health" + } +} diff --git a/dashboards/cephmetrics-graphite/ceph-osd-information.json b/dashboards/cephmetrics-graphite/ceph-osd-information.json new file mode 100644 index 0000000..0d9cc6a --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-osd-information.json @@ -0,0 +1,2178 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 67, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "220px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs UP", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(251,251,251,0.97)", + "rgba(255,165,0, 1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\"))", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\"))", + "textEditor": true + }, + { + "refId": "C", + "target": "diffSeries(#B,#A)", + "targetFull": "diffSeries(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\")))", + "textEditor": true + } + ], + "thresholds": "1,3", + "timeFrom": "1m", + "title": "OSDs DOWN", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Local", + "description": "The pie chart shows the various disk sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "height": "220", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "sortDesc": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": "", + "minSpan": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": "1", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0)", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(currentBelow(#A,1099511627776),\"<1TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),1099511627776),\"<1TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "C", + "target": "alias(currentBelow(currentAbove(#A,1099511627776),2199023255552),\"2TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),1099511627776),2199023255552),\"2TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "Q", + "target": "alias(currentBelow(currentAbove(#A,2199023255552),3298534883328),\"3TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),2199023255552),3298534883328),\"3TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "D", + "target": "alias(currentBelow(currentAbove(#A,3298534883328),4398046511104),\"4TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),3298534883328),4398046511104),\"4TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "E", + "target": "alias(currentBelow(currentAbove(#A,4398046511104),6597069766656),\"6TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),4398046511104),6597069766656),\"6TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "F", + "target": "alias(currentBelow(currentAbove(#A,6597069766656),8796093022208),\"8TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),6597069766656),8796093022208),\"8TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "G", + "target": "alias(currentBelow(currentAbove(#A,8796093022208),10995116277760),\"10TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),8796093022208),10995116277760),\"10TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "H", + "target": "alias(currentBelow(currentAbove(#A,10995116277760),13194139533312),\"12TB\")", + "targetFull": "alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),10995116277760),13194139533312),\"12TB\")", + "textEditor": true + }, + { + "refId": "I", + "target": "alias(countSeries(#B), \"<1TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),1099511627776),\"<1TB\")), \"<1TB\")", + "textEditor": true + }, + { + "refId": "J", + "target": "alias(countSeries(#C), \"2TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),1099511627776),2199023255552),\"2TB\")), \"2TB\")", + "textEditor": true + }, + { + "refId": "K", + "target": "alias(countSeries(#D), \"4TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),3298534883328),4398046511104),\"4TB\")), \"4TB\")", + "textEditor": true + }, + { + "refId": "L", + "target": "alias(countSeries(#E), \"6TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),4398046511104),6597069766656),\"6TB\")), \"6TB\")", + "textEditor": true + }, + { + "refId": "M", + "target": "alias(countSeries(#F), \"8TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),6597069766656),8796093022208),\"8TB\")), \"8TB\")", + "textEditor": true + }, + { + "refId": "N", + "target": "alias(countSeries(#G), \"10TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),8796093022208),10995116277760),\"10TB\")), \"10TB\")", + "textEditor": true + }, + { + "refId": "O", + "target": "alias(countSeries(#H), \"12TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),10995116277760),13194139533312),\"12TB\")), \"12TB\")", + "textEditor": true + }, + { + "refId": "P", + "target": "alias(countSeries(#Q), \"3TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes),0),2199023255552),3298534883328),\"3TB\")), \"3TB\")", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Disk Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "hideTimeOverride": true, + "id": 18, + "links": [], + "maxDataPoints": "", + "minSpan": 2, + "pageSize": 50, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Disk Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "alias": "Hostname.OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.stat_bytes),0),1),1,-2)", + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD Size", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "hideTimeOverride": true, + "id": 19, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "hide": true, + "refId": "C", + "target": "currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.encrypted),-1),0)", + "textEditor": true + }, + { + "hide": false, + "refId": "D", + "target": "alias(countSeries(currentAbove(#C,0.5)),\"Encrypted\")", + "targetFull": "alias(countSeries(currentAbove(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.encrypted),-1),0),0.5)),\"Encrypted\")", + "textEditor": true + }, + { + "refId": "E", + "target": "alias(countSeries(currentBelow(#C,0.5)),\"Non-Encrypted\")", + "targetFull": "alias(countSeries(currentBelow(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.encrypted),-1),0),0.5)),\"Non-Encrypted\")", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Encryption Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "hideTimeOverride": true, + "id": 20, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "hide": true, + "refId": "C", + "target": "currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id_hidden.osd_type),-1),0)", + "textEditor": true + }, + { + "hide": false, + "refId": "D", + "target": "alias(countSeries(currentAbove(#C,0.5)), \"Bluestore\")", + "targetFull": "alias(countSeries(currentAbove(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id_hidden.osd_type),-1),0),0.5)), \"Bluestore\")", + "textEditor": true + }, + { + "hide": false, + "refId": "E", + "target": "alias(countSeries(currentBelow(#C,0.5)), \"Filestore\")", + "targetFull": "alias(countSeries(currentBelow(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id_hidden.osd_type),-1),0),0.5)), \"Filestore\")", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "Summary of OSD Types", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Summary", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "500", + "panels": [ + { + "content": "

Ceph Filestore I/O Process

\n

\nA write request is first committed to a journal using direct-io (apply). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (commit). The commit operation is basically a measure of time taken to perform a syncfs call to flush dirty pages to disk, and is therefore not a time associated with any specific client initiated operation.

The tables on the right show the top 10 OSDs with the highest latencies.\n", + "height": "300", + "id": 10, + "links": [], + "minSpan": 3, + "mode": "html", + "span": 3, + "title": "", + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Filestore OSDs", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 27, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 1, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(currentBelow(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.osd_type),-1),0.5),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time spent in the queue for the journal. Excessive times here may indicate OSD tthrottling is happening. In this scenario you should review the OSD specific settings in \"ceph.conf\"; filestore_queue_max_ops or filestore_queue_max_bytes", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 3, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Queue Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.queue_transaction_latency_avg),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time taken for the write request to be safely committed to the journal device", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 4, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "0.01", + "0.1" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.journal_latency),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Apply latency covers the time taken to commit to the journal and complete the transaction", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 5, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Apply Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "100", + "500" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Commit latency is the time taken for writes to be flushed to disk as part of async kernel activity", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 6, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Commit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "1", + "3" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, only the **first** OSD is shown to prevent the graph from being unreadable!", + "fill": 0, + "height": "300px", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Apply Latency Max", + "fill": 0 + }, + { + "alias": "95%ile Apply Latency", + "fill": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.queue_transaction_latency_avg),0),1),\"Journal queue avg\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.journal_latency),0),1),\"Journal latency avg\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.apply_latency),0),1), \"Apply latency avg\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.commitcycle_latency),0),1),\"Commit latency avg\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "300px", + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.queue_transaction_latency_avg,$percentile), \"journal Queue time\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.journal_latency,$percentile), \"journal Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$percentile), \"apply Latency\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency,$percentile), \"commit/flush Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filestore OSD Latencies", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "content": "

Ceph Bluestore I/O Process

\n

\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (submit and throttle latencies), it is done directly to the disk (AIO wait), and then the metadata relating to the object is changed (kv_commit). Writes are not considered complete until the kv store is updated.

The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n", + "height": "300", + "id": 22, + "links": [], + "minSpan": 3, + "mode": "html", + "span": 3, + "title": "", + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Bluestore OSDs", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 26, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 1, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.osd_type),-1),0.5),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time spent preparing the request (transaction)", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 23, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Submit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.submit_lat),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time requests wait due to throttling or busy conditions", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 24, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Throttle Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".002", + ".005" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [ + "" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.throttle_lat),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time spent waiting for the physical I/O request to complete", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 29, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "AIO Wait Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".020", + ".050" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.state_aio_wait_lat),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time spent waiting for rocksdb (metadata store) to commit meta data", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 25, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "KV Commit ", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".003", + ".005" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.kv_commit_lat),0),$max_devices),-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "300", + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.submit_lat),0),$max_devices),\"submit latency\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.throttle_lat),0),$max_devices),\"throttle latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.state_aio_wait_lat),0),$max_devices),\"AIO Wait latency\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(limit(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.kv_commit_lat),0),$max_devices),\"KV Commit Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bluestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD", + "fill": 1, + "height": "300px", + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.submit_lat,$percentile), \"Submit Latency\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.throttle_lat,$percentile), \"Throttle Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.state_aio_wait_lat,$percentile), \"IO Wait Latency\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.kv_commit_lat,$percentile), \"KV Commit Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "BlueStore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bluestore OSD Latencies", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + } + ], + "query": "10", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*", + "refresh": 1, + "regex": "/^\\d+$/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id_hidden", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*", + "refresh": 1, + "regex": "/^\\d+$/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph OSD Information", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-10-11T02:45:33Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-osd-information", + "type": "db", + "updated": "2017-10-11T03:22:46Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/ceph-pools.json b/dashboards/cephmetrics-graphite/ceph-pools.json new file mode 100644 index 0000000..853590d --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-pools.json @@ -0,0 +1,2867 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 68, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 5, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.op_per_sec,-2,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 5, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.bytes_sec,-2,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 3, + "id": 52, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec,-2,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Workload by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool Overview : $pool_name", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 3, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "limit(sortByMaxima(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.op_per_sec,-2,\"maxSeries\")),5)", + "textEditor": true + } + ], + "title": "Top 5 Pools by Client IOPS", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 4, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "bytes" + } + ], + "targets": [ + { + "refId": "A", + "target": "limit(sortByMaxima(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.bytes_sec,-2,\"maxSeries\")),5)", + "textEditor": true + } + ], + "title": "Top 5 Pools by Throughput", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 59, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Capacity Used", + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "bytes" + } + ], + "targets": [ + { + "refId": "A", + "target": "limit(sortByMaxima(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.percent_used,-2,'maxSeries')),5)", + "textEditor": true + } + ], + "title": "Top 5 Pools by Capacity Used", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 5", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "_rgw_root", + "value": "_rgw_root" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "_rgw_root", + "value": "_rgw_root" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": "pool_name", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 60, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_buckets_data", + "value": "default_rgw_buckets_data" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_buckets_data", + "value": "default_rgw_buckets_data" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_buckets_index", + "value": "default_rgw_buckets_index" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_buckets_index", + "value": "default_rgw_buckets_index" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_control", + "value": "default_rgw_control" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_control", + "value": "default_rgw_control" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_data_root", + "value": "default_rgw_data_root" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_data_root", + "value": "default_rgw_data_root" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_gc", + "value": "default_rgw_gc" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_gc", + "value": "default_rgw_gc" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_log", + "value": "default_rgw_log" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 71, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_log", + "value": "default_rgw_log" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 72, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_meta", + "value": "default_rgw_meta" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 73, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_meta", + "value": "default_rgw_meta" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 74, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_keys", + "value": "default_rgw_users_keys" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 75, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_keys", + "value": "default_rgw_users_keys" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 76, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_swift", + "value": "default_rgw_users_swift" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_swift", + "value": "default_rgw_users_swift" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_uid", + "value": "default_rgw_users_uid" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "default_rgw_users_uid", + "value": "default_rgw_users_uid" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 80, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "rbd", + "value": "rbd" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 81, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "rbd", + "value": "rbd" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1507692845646, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 0, + "includeAll": false, + "label": "Cluster Name", + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "Pool Name", + "multi": true, + "name": "pool_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Pools", + "version": 5 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-10-11T02:45:33Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-pools", + "type": "db", + "updated": "2017-10-11T03:35:11Z", + "updatedBy": "admin@localhost", + "version": 5 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/ceph-rgw-workload.json b/dashboards/cephmetrics-graphite/ceph-rgw-workload.json new file mode 100644 index 0000000..7a63c74 --- /dev/null +++ b/dashboards/cephmetrics-graphite/ceph-rgw-workload.json @@ -0,0 +1,1744 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": 18, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 267, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "250", + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount)", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum)", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "alias(divideSeries(sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum), sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount)),\"GET\")", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount)", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum)", + "textEditor": true + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "alias(divideSeries(sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum),sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount)), \"PUT\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average Request Latency - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "CPU Busy at $percentile%ile across all radosgw hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "250", + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(groupByNode(collectd.$rgw_servers.$domain.cpu.percent.{system,user,wait,interrupt},1,\"sumSeries\"),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "RGW Hosts CPU Busy", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "Failed HTTP Requests": "#0A437C" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the number of failed/aborted requests across all rados gateways during the last 10secs", + "fill": 3, + "height": "250", + "id": 152, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.failed_req, 1, \"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Failed HTTP Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "250", + "id": 45, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.qlen)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Request Queue Length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "290", + "id": 34, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "C", + "target": "groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.{get,put}, 1, \"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Total of GET/PUT HTTP traffic through all radosgw hosts", + "fill": 1, + "height": "290", + "id": 39, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "hide": false, + "refId": "C", + "target": "groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.{get_b,put_b}, 1, \"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Request Size - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RadosGW Overview - All Gateways", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "300", + "panels": [ + { + "content": "", + "height": "150", + "id": 50, + "links": [], + "minSpan": 1, + "mode": "markdown", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180", + "id": 43, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(collectd.$rgw_servers.$domain.cpu.percent.{user,system,wait,steal,softirq,interrupt})", + "textEditor": true + } + ], + "thresholds": "", + "title": "CPU Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "RAM Utilization on $rgw_servers - including a usage history covering the 12 hours", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180", + "hideTimeOverride": true, + "id": 46, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$rgw_servers.$domain.memory.percent.used", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "12h", + "timeShift": null, + "title": "RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the total network load on the rados gw host", + "fill": 1, + "height": "180", + "id": 150, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(sumSeries(consolidateBy(collectd.$rgw_servers.$domain.interface.{en,eth,bond}*.if_octets.rx, \"sum\")),-1)", + "textEditor": true + }, + { + "refId": "B", + "target": "aliasByNode(sumSeries(consolidateBy(collectd.$rgw_servers.$domain.interface.{en,eth,bond}*.if_octets.tx, \"sum\")),-1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Failed HTTP Requests": "#0A437C" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the number of failed/aborted requests on this rados gateway during the last 10secs", + "fill": 3, + "height": "180", + "id": 151, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.failed_req, \"Failed HTTP Requests\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Failed HTTP Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')", + "textEditor": true + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "alias(divideSeries(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount),\"GET\")", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "alias(divideSeries(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount), \"PUT\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 40, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_b, 'GET')", + "textEditor": true + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_b, 'PUT')", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP GET/PUT Request Sizes", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": "rgw_servers", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host : $rgw_servers", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "obj-rgw-1", + "value": "obj-rgw-1" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "rgw_servers", + "options": [ + { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + ], + "query": "obj-rgw-1", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": true, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph RGW Workload", + "version": 1 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-rgw-workload", + "type": "db", + "updated": "2017-08-01T20:58:52Z", + "updatedBy": "admin@localhost", + "version": 1 + } +} diff --git a/dashboards/cephmetrics-graphite/disk-busy-by-server.json b/dashboards/cephmetrics-graphite/disk-busy-by-server.json new file mode 100644 index 0000000..0df379e --- /dev/null +++ b/dashboards/cephmetrics-graphite/disk-busy-by-server.json @@ -0,0 +1,594 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 10, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Hosts Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621235322, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621235322, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Host's $percentile%ile Disk Utilisation", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "ceph_cluster", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Disk Busy by Server", + "version": 2 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "disk-busy-by-server", + "type": "db", + "updated": "2017-08-01T21:02:25Z", + "updatedBy": "admin@localhost", + "version": 2 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/iops-by-server.json b/dashboards/cephmetrics-graphite/iops-by-server.json new file mode 100644 index 0000000..a423852 --- /dev/null +++ b/dashboards/cephmetrics-graphite/iops-by-server.json @@ -0,0 +1,553 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 12, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Server IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501622535508, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501622535508, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "IOPS by Server", + "version": 4 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "iops-by-server", + "type": "db", + "updated": "2017-08-01T21:23:04Z", + "updatedBy": "admin@localhost", + "version": 4 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/iscsi-overview.json b/dashboards/cephmetrics-graphite/iscsi-overview.json new file mode 100644 index 0000000..ed96f78 --- /dev/null +++ b/dashboards/cephmetrics-graphite/iscsi-overview.json @@ -0,0 +1,1894 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 28, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "content": "", + "height": "100", + "id": 19, + "links": [], + "minSpan": 2, + "mode": "markdown", + "span": 2, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "1", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.tpg_count), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Gateways", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "1", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.client_count), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Clients", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "description": "Sessions shows the number of iSCSI clients currently logged in to the gateway group", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "maxSeries(keepLastValue(consolidateBy(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.sessions, \"max\")))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Sessions", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Capacity refers to the total capacity defined within the iSCSI gateway group, and available to iSCSI clients", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": false, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.capacity), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Defined Capacity", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.lun_count, \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "LUNs", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "hideTimeOverride": true, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.lun_count), \"max\"))", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(countSeries(groupByNode(currentAbove(collectd.$iscsi_gateways.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.$clients.luns.*.size,0.5),-2,\"maxSeries\")),\"mapped\")", + "textEditor": true + }, + { + "refId": "C", + "target": "diffSeries(#A,#B)", + "targetFull": "diffSeries(maxSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_stats.lun_count), \"max\")),alias(countSeries(groupByNode(currentAbove(collectd.$iscsi_gateways.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.$clients.luns.*.size,0.5),-2,\"maxSeries\")),\"mapped\"))", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Unused LUNs", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "height": "100", + "id": 22, + "links": [], + "minSpan": 3, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "", + "height": "100", + "id": 23, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Total IOPS across all iSCSI gateways", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "200", + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_stats.iops), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "IOPS", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Total read/write throughput across all iSCSI gateways", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "200", + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_stats.total_bytes_per_sec), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "", + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$iscsi_gateways.$domain.interface.{bond,en,eth}*.if_octets.{tx,rx},1, \"sum\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "short", + "height": "200", + "hideTimeOverride": true, + "id": 26, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "minSpan": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": 1, + "targets": [ + { + "hide": true, + "refId": "A", + "target": "currentAbove(keepLastValue(collectd.$iscsi_gateways.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.*.luns.*.active_path),0.5)", + "textEditor": true + }, + { + "refId": "B", + "target": "groupByNode(#A,1,\"sumSeries\")", + "targetFull": "groupByNode(currentAbove(keepLastValue(collectd.$iscsi_gateways.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.*.luns.*.active_path),0.5),1,\"sumSeries\")", + "textEditor": true + } + ], + "timeFrom": "1m", + "title": "Primary LUN Paths Per Gateway", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "iSCSI Gateway Group : $gw_name", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "height": "", + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$iscsi_gateways.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_stats.iops,1, \"sum\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS Load by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "height": "", + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$iscsi_gateways.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_stats.total_bytes_per_sec,1, \"sum\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "Throughput", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "height": "", + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$iscsi_gateways.$domain.cpu.percent.{interrupt,steal,system,user,wait},1, \"sum\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Gateway CPU Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "height": "", + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$iscsi_gateways.$domain.memory.percent.used,1, \"sum\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Gateway Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway Load", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "currentAbove(groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_clients.$clients.luns.*.iops,-4,\"sum\"),0)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by Client", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "currentAbove(groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_clients.$clients.luns.*.total_bytes_per_sec,-4,\"sum\"),0)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Client", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "limit(groupByNode(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_clients.$clients.luns.*.iops,0),-2,\"sum\"),10)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by RBD Image for Client '$clients'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "limit(groupByNode(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.iscsi.gw_clients.$clients.luns.*.total_bytes_per_sec,0),-2,\"sum\"),10)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by RBD Image for Client '$clients'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Client Load : '$clients'", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 15, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": false, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Disk (pool-image)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "groupByNode(maximumAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.$clients.luns.*.size,0),-2,\"maxSeries\")", + "textEditor": true + } + ], + "title": "RBD Images Masked to '$clients'", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 16, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": false, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Client", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "# Luns Masked", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "groupByNode(currentAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.$clients.lun_count,0),-2,\"maxSeries\")", + "textEditor": true + } + ], + "title": "Luns Per Client", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 17, + "links": [], + "maxDataPoints": "1", + "minSpan": 4, + "pageSize": 10, + "scroll": false, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Client", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Capacity", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "sumSeries(groupByNode(keepLastValue(collectd.$iscsi_gateways.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.$clients.luns.*.size),-2,\"maxSeries\"))", + "textEditor": true + } + ], + "title": "Client '$clients' - Masked Capacity", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Client Configuration : '$clients'", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "test.lab", + "value": "test.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "test.lab", + "value": "test.lab" + } + ], + "query": "test.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "iSCSI Client", + "multi": false, + "name": "clients", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_clients.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "iqn-2003-01-com-redhat-iscsi-gw_ceph-gw", + "value": "iqn-2003-01-com-redhat-iscsi-gw_ceph-gw" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "gw_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.iscsi.gw_name.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "iscsi_gateways", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "rh7-gw1", + "value": "rh7-gw1" + }, + { + "selected": false, + "text": "rh7-gw2", + "value": "rh7-gw2" + } + ], + "query": "rh7-gw1, rh7-gw2", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "iSCSI Overview", + "version": 51 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-03T23:35:37Z", + "createdBy": "admin", + "expires": "0001-01-01T00:00:00Z", + "slug": "iscsi-overview", + "type": "db", + "updated": "2017-09-12T04:50:03Z", + "updatedBy": "admin", + "version": 51 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/latency-by-server.json b/dashboards/cephmetrics-graphite/latency-by-server.json new file mode 100644 index 0000000..e932d8e --- /dev/null +++ b/dashboards/cephmetrics-graphite/latency-by-server.json @@ -0,0 +1,573 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 13, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Hosts - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621443172, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621443172, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Host's Max Disk Latency", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + } + ], + "query": "95", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Latency by Server", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "latency-by-server", + "type": "db", + "updated": "2017-08-01T21:04:34Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/network-usage-by-node.json b/dashboards/cephmetrics-graphite/network-usage-by-node.json new file mode 100644 index 0000000..d58d308 --- /dev/null +++ b/dashboards/cephmetrics-graphite/network-usage-by-node.json @@ -0,0 +1,571 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 20, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "300px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the total transmit and receive throughput to all interfaces across the Ceph hosts.", + "fill": 1, + "height": "", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Host Details", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(groupByNode(collectd.$osd_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")),\"OSD Hosts\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(groupByNode(collectd.$rgw_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")),\"RadosGW Hosts\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(sumSeries(groupByNode(collectd.$mon_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")),\"MON Hosts\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Load Across MON, OSD and RADOSGW Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Aggregated Network Load", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$mon_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Ceph MON Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "MON Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Host Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$rgw_servers.$domain.interface.{bond,en,eth}*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RadosGW Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RadosGW Hosts", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "label": "OSD Hostname", + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "ceph-1", + "value": "ceph-1" + }, + { + "selected": false, + "text": "ceph-2", + "value": "ceph-2" + }, + { + "selected": false, + "text": "ceph-3", + "value": "ceph-3" + } + ], + "query": "ceph-1,ceph-2,ceph-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "test.lab", + "value": "test.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "test.lab", + "value": "test.lab" + } + ], + "query": "test.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "Monitor Host", + "multi": false, + "name": "mon_servers", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.mon_status.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "All" + }, + "hide": 0, + "includeAll": true, + "label": "RadosGW Host", + "multi": false, + "name": "rgw_servers", + "options": [ + { + "selected": false, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + ], + "query": "obj-rgw-1", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Network Usage by Node", + "version": 5 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-03T21:42:28Z", + "createdBy": "admin", + "expires": "0001-01-01T00:00:00Z", + "slug": "network-usage-by-node", + "type": "db", + "updated": "2017-09-12T23:49:40Z", + "updatedBy": "admin", + "version": 5 + } +} \ No newline at end of file diff --git a/dashboards/cephmetrics-graphite/osd-node-detail.json b/dashboards/cephmetrics-graphite/osd-node-detail.json new file mode 100644 index 0000000..3ad8eec --- /dev/null +++ b/dashboards/cephmetrics-graphite/osd-node-detail.json @@ -0,0 +1,988 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 74, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 125, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "160", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,6))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "decimals": 0, + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "160", + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(consolidateBy(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6), \"sum\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "", + "fontSize": "100%", + "height": "160", + "id": 14, + "links": [], + "minSpan": 3, + "pageSize": 1000, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Host and Disk", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.osd_id,1,-2)", + "textEditor": true + } + ], + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "", + "fontSize": "100%", + "height": "160", + "hideTimeOverride": true, + "id": 16, + "links": [], + "maxDataPoints": "1", + "minSpan": 3, + "pageSize": 1000, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Host and Disk", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Disk Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.disk_size),1,-2)", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "'$osd_servers' OSD Overview", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.util, $max_devices), -3)", + "textEditor": false + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%Util", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.iops, $max_devices), -3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.await, $max_devices), -3)", + "textEditor": false + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.bytes_per_sec, $max_devices), -3)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 3, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "limit(aliasByMetric(collectd.$osd_servers.$domain.cpu.percent.{system,user,wait,steal,nice,interrupt,softirq}),7)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.$osd_servers.$domain.interface.{eth,en,bond}*.if_octets.*, 40), -3, -1)", + "textEditor": false + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "'$osd_servers' Performance Statistics", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "tags": [], + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + "hide": 0, + "includeAll": true, + "label": "OSD Host Name", + "multi": false, + "name": "osd_servers", + "options": [ + { + "selected": false, + "text": "All", + "value": "$__all" + }, + { + "selected": true, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "Disk Name", + "multi": true, + "name": "device_id", + "options": [], + "query": "collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*", + "refresh": 1, + "regex": "(?!ceph_version|num_osds|\\d).*", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "60", + "value": "60" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "60", + "value": "60" + } + ], + "query": "60", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "OSD Node Detail", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-10-11T02:45:34Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "osd-node-detail", + "type": "db", + "updated": "2017-10-11T03:21:03Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} diff --git a/dashboards/mgr-prometheus/alert-status.json b/dashboards/mgr-prometheus/alert-status.json new file mode 100644 index 0000000..3a56066 --- /dev/null +++ b/dashboards/mgr-prometheus/alert-status.json @@ -0,0 +1,1200 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "id": 1, + "limit": "20", + "links": [], + "onlyAlertsOnDashboard": true, + "show": "current", + "sortOrder": 3, + "span": 12, + "stateFilter": [ + "alerting" + ], + "title": "Active Ceph Alert List", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "name": "Overall Ceph Health alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": { + "Ceph Health": "#890F02", + "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7", + "ceph health": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state.", + "fill": 1, + "hideTimeOverride": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxDataPoints": "360", + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Ceph Health", + "refId": "A", + "step": 20, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Overall Ceph Health", + "tooltip": { + "shared": false, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "2", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Disks Near Full alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disks Near Full", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "30s", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "name": "OSDs Down alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Count of OSDs currently in a DOWN state", + "fill": 2, + "hideTimeOverride": true, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "count(ceph_osd_metadata) - count(ceph_osd_up > 0.5)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "down", + "refId": "A", + "step": 4, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Cluster Capacity alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100 ", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Raw Capacity Used %", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "PG's Stuck alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.", + "fill": 2, + "hideTimeOverride": true, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(ceph_osd_numpg) - scalar(ceph_pg_active)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "# pg's stuck inactive", + "refId": "A", + "step": 240, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "6h", + "timeShift": null, + "title": "PG's Stuck", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "min" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "OSD Host Loss Check alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n sum(ceph_osd_stat_bytes - ceph_osd_stat_bytes_used)\n) * 0.9 -\nmax(\n sum by (instance) (\n ceph_osd_stat_bytes + on (ceph_daemon) group_left (instance) (ceph_disk_occupation*0)\n )\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Host Loss Check", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "Slow OSD responses alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Graph checking for OSD Latencies that are above 1s.", + "fill": 1, + "hideTimeOverride": true, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001) +\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n ) and on (instance, device) ceph_disk_occupation\n) >= 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}.{{device}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1000 + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Slow OSD responses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 10 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "Network Errors alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (\n irate(node_network_receive_drop{device=~\"(eth|en|bond).*\"}[5m]) +\n irate(node_network_receive_errs{device=~\"(eth|en|bond).*\"}[5m]) +\n irate(node_network_transmit_drop{device=~\"(eth|en|bond).*\"}[5m]) +\n irate(node_network_transmit_errs{device=~\"(eth|en|bond).*\"}[5m])\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Pool Capacity alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) (ceph_pool_metadata*0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Health Checks", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Alert Status" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-at-a-glance.json b/dashboards/mgr-prometheus/ceph-at-a-glance.json new file mode 100644 index 0000000..86430a3 --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-at-a-glance.json @@ -0,0 +1,3107 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "145", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 1, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Cluster", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 2, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Pools", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 3, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSDs", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 5, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSD Hosts", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "50px", + "id": 6, + "interval": null, + "links": [ + { + "dashUri": "db/network-usage-by-node", + "dashboard": "Network Usage by Node", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Usage by Host", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Network", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "id": 7, + "links": [], + "minSpan": 4, + "mode": "markdown", + "span": 4, + "title": "", + "transparent": true, + "type": "text" + }, + { + "id": 8, + "limit": 10, + "links": [ + { + "dashUri": "db/alert-status", + "dashboard": "Alert Status", + "targetBlank": true, + "title": "Alert Status", + "type": "dashboard" + } + ], + "minSpan": 2, + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 3, + "span": 2, + "stateFilter": [ + "alerting" + ], + "title": "Active Alerts", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "225", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "Shows the overall health of the ceph cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-health", + "dashboard": "Ceph Health", + "includeVars": true, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Health", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "OK", + "to": "0" + }, + { + "from": "1", + "text": "WARN", + "to": "4" + }, + { + "from": "5", + "text": "ERROR", + "to": "99" + }, + { + "from": "-10", + "text": "NODATA", + "to": "0" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.health),\"max\")", + "textEditor": true + } + ], + "thresholds": "1,5", + "timeFrom": "1m", + "timeShift": null, + "title": "Health", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "4" + }, + { + "op": "=", + "text": "ERROR", + "value": "8" + } + ], + "valueName": "current" + }, + { + "clusterName": "MONs", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "MONs", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 10, + "isGrayOnNoData": true, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(ceph_mon_election_win)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "total", + "refId": "D", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "quorum", + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "ceph_mon_quorum_count", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quorum", + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")", + "textEditor": true, + "valueHandler": "String Threshold", + "warn": "1" + }, + { + "aggregation": "Last", + "alias": "down", + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_count) - count(ceph_mon_election_win)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "down", + "refId": "A", + "target": "alias(diffSeries(#D,#E), \"down\")", + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")), \"down\")", + "textEditor": true, + "valueHandler": "String Threshold", + "warn": "1" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSDs", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "OSDs", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 11, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(ceph_osd_up)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "total", + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "in", + "displayType": "Regular", + "expr": "count(ceph_osd_up == 1)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "up", + "refId": "F", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up), \"max\")),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "expr": "count(ceph_osd_in == 1)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "in", + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_in), \"max\")),\"in\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "out", + "displayType": "Regular", + "expr": "count(ceph_osd_in == 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out", + "refId": "C", + "target": "alias(countSeries(currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.in,99),-2,\"maxSeries\"),0.5)),\"out\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 5, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_up == 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "down", + "refId": "B", + "target": "alias(countSeries(currentBelow(groupByNode(transformNull(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.up,99),-2,\"maxSeries\"),0.5)),\"down\")", + "textEditor": true, + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSD Hosts", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "OSD Hosts", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 12, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "minSpan": 1, + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(\n count(\n ceph_disk_occupation\n ) by (instance)\n)", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "total", + "refId": "A", + "target": "alias(countSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "expr": "count(\n count(\n ceph_osd_up + on (ceph_daemon) group_left(instance) ceph_disk_occupation\n ) by (instance) > 0\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "up", + "refId": "B", + "target": "alias(countSeries(currentAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,0)),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(\n count(\n ceph_osd_up + on (ceph_daemon) group_left(instance) ceph_disk_occupation\n ) by (instance) == 0\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "down", + "refId": "C", + "target": "alias(diffSeries(#A, #B), \"down\")", + "targetFull": "alias(diffSeries(alias(countSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds),\"total\"), alias(countSeries(currentAbove(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.num_osds,0)),\"up\")), \"down\")", + "textEditor": true, + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "30s", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "RGWs", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "RGWs", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 13, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "hide": true, + "refId": "A", + "target": "constantLine(0)", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "displayType": "Regular", + "hide": true, + "refId": "B", + "target": "countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put)", + "textEditor": true, + "valueHandler": "Regular" + }, + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "hide": false, + "refId": "C", + "target": "alias(limit(sortByMaxima(group(#A,#B)),1),\"total\")", + "targetFull": "alias(limit(sortByMaxima(group(constantLine(0),countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put))),1),\"total\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "displayType": "Regular", + "hide": true, + "refId": "D", + "target": "countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0))", + "textEditor": true, + "valueHandler": "Regular" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "hide": false, + "refId": "E", + "target": "alias(limit(sortByMaxima(group(#A,#D)),1), \"up\")", + "targetFull": "alias(limit(sortByMaxima(group(constantLine(0),countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0)))),1), \"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 6, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "hide": false, + "refId": "F", + "target": "alias(diffSeries(#B, #D),\"down\")", + "targetFull": "alias(diffSeries(countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put), countSeries(currentAbove(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0))),\"down\")", + "textEditor": true, + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "30s", + "timeShift": null, + "title": "", + "transparent": false, + "type": "vonage-status-panel" + }, + { + "clusterName": "MDS", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "MDS", + "flipCard": false, + "flipTime": 5, + "hideTimeOverride": true, + "id": 14, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "expr": "", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "up", + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_up), \"max\")),\"up\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "in", + "displayType": "Regular", + "expr": "", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "E", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_in), \"max\")),\"in\")", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_failed), \"max\")),\"down\")", + "textEditor": true, + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "params": "panelId=3&fullscreen&orgId=1", + "targetBlank": true, + "title": "Cluster Capacity Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "%", + "postfixFontSize": "40%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 2, + "refId": "A", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 1,\"maxSeries\")), \"raw capacity\")", + "textEditor": true + } + ], + "thresholds": "70,90", + "timeFrom": "1m", + "timeShift": null, + "title": "Capacity Utilization", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "description": "Shows the growth rate based on osd usage over the past $growth_window.", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "topk(1, ceph_cluster_total_used_bytes offset 1d) - ignoring (instance,job) topk(1, ceph_cluster_total_used_bytes offset 7d)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "$growth_window Growth Rate", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Shows the estimated number of weeks left, based on consumption over the past $growth_window.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + }, + { + "from": "-99999", + "text": "N/A", + "to": "0" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "A", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "B", + "target": "alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\")", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "C", + "target": "keepLastValue(diffSeries(#B, #A))", + "targetFull": "keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "D", + "target": "consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\")", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "E", + "target": "alias(divideSeries(#D, #C), 'Weeks Left till full')", + "targetFull": "alias(divideSeries(consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\"), keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))), 'Weeks Left till full')", + "textEditor": true + } + ], + "thresholds": "", + "title": "Weeks Remaining", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "active + clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_active_clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering": "#ffa500", + "peering": "#0A50A1" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "" + }, + "datasource": null, + "fontSize": "100%", + "format": "none", + "height": "", + "hideTimeOverride": true, + "id": 18, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster Information", + "type": "dashboard" + } + ], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": "", + "targets": [ + { + "expr": "ceph_pg_clean", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "clean", + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\")),\"active + clean\")", + "textEditor": true + }, + { + "expr": "ceph_pg_active", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "active", + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\")),\"active\")", + "textEditor": true + }, + { + "expr": "ceph_pg_degraded", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "degraded", + "refId": "C", + "target": "alias(diffSeries(#B,#A),\"active + degraded\")", + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\")),\"active\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\")),\"active + clean\")),\"active + degraded\")", + "textEditor": true + }, + { + "expr": "ceph_pg_peering", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "peering", + "refId": "D", + "target": "alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_peering), \"max\"),\"peering\")", + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Placement Group Status", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "At a Glance", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "230", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,0,0,1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 19, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "INACTIVE", + "to": "0" + }, + { + "from": "1", + "text": "ACTIVE", + "to": "99999" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_pg_scrubbing{job=\"ceph\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "target": "consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"max\")", + "textEditor": true + } + ], + "thresholds": "1", + "title": "Scrub", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "INACTIVE", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "", + "value": "" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,165,0, 0.89)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "decimals": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 20, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(ceph_osd_recovery_ops[1m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.recovering_bytes_per_sec,-2,\"avg\"))", + "textEditor": true + } + ], + "thresholds": "1,2", + "title": "Recovery Ops", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 21, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "sum(\n rate(ceph_pool_wr[$__interval])\n)\n+ \nsum(\n rate(ceph_pool_rd[$__interval])\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "legendFormat": "IOPS", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.op_per_sec,-2,\"maxSeries\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 22, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "sum(rate(ceph_pool_wr_bytes[$__interval]) + rate(ceph_pool_rd_bytes[$__interval]))", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.bytes_sec,-2,\"maxSeries\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Client Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "count(ceph_pool_metadata)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pool)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "target": "alias(sumSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_rbds,6),\"max\")),\"# rbds\")", + "textEditor": true + } + ], + "thresholds": "", + "title": "RBDs", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current" + }, + { + "aliasColors": { + "Reads": "#01a701", + "Writes": "#82B5D8" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "description": "Shows the read/write threshold of client IOPS serviced by the ceph cluster", + "fontSize": "80%", + "format": "none", + "height": "230", + "id": 25, + "interval": null, + "legend": { + "percentage": false, + "show": false, + "values": false + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "round(sum(irate(ceph_pool_rd[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "target": "alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.read_op_per_sec,-2,\"maxSeries\")), \"Reads\")", + "textEditor": true + }, + { + "expr": "round(sum(irate(ceph_pool_wr[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "B", + "target": "alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.write_op_per_sec,-2,\"maxSeries\")), \"Writes\")", + "textEditor": true + } + ], + "title": "Client Read/Write Ratio", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02", + "Commit Latency": "#447EBC" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the OSD apply and commit latency at the $percentile%ile across the cluster over the past 15 minutes", + "fill": 0, + "hideTimeOverride": true, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 2, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile / 100, ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "commit", + "refId": "A", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$percentile), \"Apply Latency\")", + "textEditor": true + }, + { + "expr": "quantile($percentile / 100, ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "apply", + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency, $percentile), \"Commit Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "OSD Apply vs Commit Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Performance", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "230px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "CPU usage is presented based on the $percentile%ile across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "quantile(\n $percentile / 100, (\n avg (\n irate(\n node_cpu{mode=~\"system|user|iowait\", instance=~'[[osd_servers]]'}[1m]\n )\n ) by (instance)\n ) * 100\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "target": "percentileOfSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{system,user,wait},1,\"sumSeries\"),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "OSD Hosts CPU Busy", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Total IOPS from all OSDs in the cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "sum(\n sum(\n rate(\n node_disk_reads_completed{instance=~\"[[osd_servers]]\"}[$__interval]\n )\n ) by (instance,device)\n)\n+\nsum(\n sum(\n rate(\n node_disk_writes_completed{instance=~\"[[osd_servers]]\"}[$__interval]\n )\n ) by (instance,device)\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 29, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "# should only include OSD hosts\nsum(\n sum(\n rate(\n node_disk_bytes_read[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_bytes_written[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.bytes_per_sec)", + "textEditor": true + }, + { + "dsType": "influxdb", + "expr": "# will include non-OSD hosts\nsum(\n rate(\n node_disk_bytes_read[$__interval]\n ) + \n rate(\n node_disk_bytes_written[$__interval]\n )\n)", + "format": "time_series", + "groupBy": [], + "hide": true, + "intervalFactor": 1, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.bytes_per_sec)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "The count of the number of disks in the cluster that are over $disk_full_threshold% full.", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 30, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(\n (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) > ($disk_full_threshold / 100)\n)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "target": "countSeries(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_percent_used),0),$disk_full_threshold))", + "textEditor": true + } + ], + "thresholds": "", + "title": "Nearly Full Disks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "RAM Usage shows the $percentile%ile of RAM used across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 31, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(46, 161, 15, 0)", + "full": false, + "lineColor": "rgb(164, 139, 4)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "quantile(\n $percentile/100, \n (\n node_memory_MemTotal{job='node'} - node_memory_MemFree{job='node'} - node_memory_Buffers{job='node'} - node_memory_Cached{job='node'}\n ) / node_memory_MemTotal{job='node'} * 100\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 2, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "percentileOfSeries(collectd.$osd_servers.$domain.memory.percent.used,$percentile)", + "textEditor": true + } + ], + "thresholds": "", + "title": "RAM Util.", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1,1)", + "rgba(255,165,0,1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "This panel shows the $percentile%ile disk await time across all OSDs in the cliuster", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 32, + "interval": null, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " ms", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "quantile(\n $percentile / 100,\n (\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + \n ignoring(ceph_daemon,job) ceph_disk_occupation\n )\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "C" + }, + { + "expr": "max(\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + ignoring(ceph_daemon,job) ceph_disk_occupation\n)", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "B" + } + ], + "thresholds": "20,60", + "title": "Disk Latency", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "average %util": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Chart shows the disk utilization over the past 15 mins expressed as an average across all OSDs, and at the $percentile%ile.", + "fill": 1, + "hideTimeOverride": true, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "%util @ 95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) \n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "average", + "refId": "A", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util), 'average %util')", + "textEditor": false + }, + { + "expr": "quantile(\n $percentile/100, \n (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) \n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "$percentile%ile", + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util, $percentile, 'false'), '%util @ $percentile%ile')" + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "7d", + "value": "7d" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "growth_window", + "options": [ + { + "selected": true, + "text": "7d", + "value": "7d" + } + ], + "query": "7d", + "type": "custom" + }, + { + "allValue": "", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Host", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph - At A Glance" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-backend-storage.json b/dashboards/mgr-prometheus/ceph-backend-storage.json new file mode 100644 index 0000000..10a0b3a --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-backend-storage.json @@ -0,0 +1,1286 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "4.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Local", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 247, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up < 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "OSDs down", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 2, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Host and Disk", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "title": "Disks Near Full", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 3, + "links": [], + "maxDataPoints": "1", + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Total OSD Capacity", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "alias": "Host Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 7200, + "textEditor": true + } + ], + "title": "Host OSD Raw Capacity", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 4, + "links": [], + "maxDataPoints": "10", + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 5, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "# OSDs", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Host Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600, + "textEditor": true + } + ], + "title": "Host OSD Capacity Breakdown", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk/OSD Host Summary", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "300", + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateRdYlGn", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": null, + "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the count of disk utilization observations at that point.", + "heatmap": {}, + "highlightCards": true, + "id": 5, + "links": [], + "minSpan": 6, + "span": 6, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"} / 10", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "title": "Disk Drive Utilization Heatmap - $osd_servers", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": 180, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "short", + "logBase": 1, + "max": "100", + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": 5 + }, + { + "aliasColors": { + "Read Throughput": "#629E51", + "Write Throughput": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n irate(node_disk_bytes_read{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read throughtput", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "sum(\n irate(node_disk_bytes_written{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Write throughtput", + "refId": "B", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Read Latency": "#629E51", + "Write Latency": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read latency", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "quantile($percentile/100.0, (\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Write latency", + "refId": "B", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "line": true, + "lineColor": "rgba(178, 0, 0, 0.29)", + "op": "gt", + "value": 50 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Latency - $osd_servers OSDs @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "disk busy %": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "disk busy %", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "line": true, + "lineColor": "rgba(178, 0, 0, 0.29)", + "op": "gt", + "value": 80 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "IOPS/spindle": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IOPS/spindle", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "IOPS": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/iops-by-server", + "dashboard": "IOPS by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "IOPS by Server", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Disk IOPS - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk/OSD Load Summary", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": { + "CPU Busy": "#447EBC", + "CPU Busy @ 95%ile": "#890F02", + "Cluster-wide CPU Busy @ 95%ile": "#890F02", + "Max CPU Busy": "#BF1B00", + "Max CPU Busy - all OSD Hosts": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 3, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Cluster-wide CPU Busy @ 95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user|iowait)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user|iowait)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Average OSD Host(s) CPU Busy", + "refId": "B", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Network load (rx+tx)": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/network-usage-by-node", + "dashboard": "Network Usage by Node", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Load by Node", + "type": "dashboard" + } + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m])) + \nsum (irate(node_network_transmit_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Network load (rx+tx)", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Host CPU and Network Load", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "OSD Hostname", + "multi": true, + "name": "osd_servers", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/instance=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Backend Storage" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-cluster.json b/dashboards/mgr-prometheus/ceph-cluster.json new file mode 100644 index 0000000..e6704ad --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-cluster.json @@ -0,0 +1,2277 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100", + "panels": [ + { + "content": "", + "id": 1, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "ceph_mon_quorum_count", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "MONs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (instance) (ceph_disk_occupation))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "count(ceph_server_metadata{services=~\".*mds.*\"})", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_server_metadata{services=~\".*rgw.*\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "iSCSI Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "count(max by (id) (ceph_osd_metadata))", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "expr": "count(ceph_pool_metadata)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_cluster_total_bytes - ceph_cluster_total_used_bytes", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "thresholds": "", + "title": "Unused Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Configuration", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "100", + "panels": [ + { + "content": "", + "id": 10, + "links": [], + "minSpan": 2, + "mode": "markdown", + "span": 2, + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "

Cluster Flags:

", + "height": "95", + "id": 11, + "links": [], + "minSpan": 1, + "mode": "html", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "SCRUB", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DEEP", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "RECOVERY", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfill >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "BACKFILL", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norebalance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "REBALANCE", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noout)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "OUT", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DOWN", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Cluster Flags", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "250", + "panels": [ + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the Capacity within the cluster over the past 7 days", + "fill": 1, + "hideTimeOverride": true, + "id": 19, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "dsType": "influxdb", + "expr": "scalar(ceph_cluster_total_used_bytes)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "Used", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + }, + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "expr": "scalar(ceph_cluster_total_bytes)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "Raw", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "Cluster Capacity - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "Shows the Capacity within each pool over the past 7 days", + "fill": 1, + "hideTimeOverride": true, + "id": 20, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "dsType": "influxdb", + "expr": "max by (name) (\n ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) ceph_pool_metadata\n)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "{{name}}", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "Pool Capacity - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "Shows the Capacity within the cluster over the past 7 days", + "fill": 1, + "hideTimeOverride": true, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "dsType": "influxdb", + "expr": "scalar(ceph_cluster_total_objects)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "RADOS Objects", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "RADOS Object History - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "RADOS Object Count", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 243, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 22, + "links": [], + "minSpan": 2, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Host|Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_server_metadata{services=~\".*mon.*\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{hostname}}|{{version}}", + "refId": "A" + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "MON Versions", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 23, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 24, + "links": [], + "minSpan": 2, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Host|Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_server_metadata{services=~\".*osd.*\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{hostname}}|{{version}}", + "refId": "A" + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Versions", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 25, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 26, + "links": [], + "minSpan": 2, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Host|Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_server_metadata{services=~\".*rgw.*\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{hostname}}|{{version}}", + "refId": "A", + "textEditor": true + }, + { + "expr": "absent(ceph_server_metadata{services=~\".*rgw.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "No RGWs found", + "refId": "B" + } + ], + "timeFrom": "2m", + "title": "RGW Versions", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 27, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 28, + "links": [], + "minSpan": 2, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "iSCSI Versions", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ceph Version Configuration", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Cluster" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-health.json b/dashboards/mgr-prometheus/ceph-health.json new file mode 100644 index 0000000..8b7a783 --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-health.json @@ -0,0 +1,1859 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": null, + "enable": true, + "hide": false, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "health_alert", + "showIn": 0, + "tags": "health_alert", + "type": "alert" + }, + { + "datasource": null, + "enable": true, + "hide": false, + "iconColor": "rgb(1, 195, 0)", + "limit": 100, + "name": "health_ok", + "showIn": 0, + "tags": "health_ok", + "type": "alert" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "70", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "Shows the overall health of the ceph cluster. To see specific health messages hover over the annotation in the health chart below.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "70", + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "HEALTH OK", + "to": "1" + }, + { + "from": "1", + "text": "HEALTH WARNING", + "to": "4" + }, + { + "from": "5", + "text": "HEALTH ERROR", + "to": "99" + }, + { + "from": "-10", + "text": "NODATA", + "to": "0" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,5", + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "HEALTH OK", + "value": "0" + }, + { + "op": "=", + "text": "HEALTH WARN", + "value": "1" + }, + { + "op": "=", + "text": "HEALTH ERROR", + "value": "2" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "350", + "panels": [ + { + "aliasColors": { + "Ceph Health": "#3F6833", + "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.health": "#DEDAF7" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state. Horizontal thresholds (drawn in yellow and red) indicate when the cluster's health transitions to 'warning' or 'error'. Annotations describing the details of the clusters health may also be shown if the \"EventURL\" setting is defined to the collectd plugin on the mon hosts.", + "fill": 1, + "height": "350", + "hideTimeOverride": true, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxDataPoints": "", + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "Ceph Health", + "refId": "A", + "step": 600, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "fillColor": "rgba(222, 226, 0, 0.47)", + "line": true, + "lineColor": "rgb(247, 172, 0)", + "op": "gt", + "value": 1 + }, + { + "colorMode": "custom", + "fill": false, + "fillColor": "rgba(246, 3, 3, 0.5)", + "line": true, + "lineColor": "rgb(203, 0, 0)", + "op": "gt", + "value": 2 + } + ], + "timeFrom": "3d", + "timeShift": null, + "title": "Health History - Last 3 days", + "tooltip": { + "shared": false, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "2", + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Health", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "200", + "panels": [ + { + "content": "", + "id": 3, + "links": [], + "minSpan": 1, + "mode": "html", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 4, + "links": [], + "maxDataPoints": "", + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Hostname", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "sanitize": false, + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Status", + "colorMode": "row", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(0, 169, 3, 0.89)", + "rgba(249, 190, 0, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [ + "0", + "3", + "7" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TODO", + "refId": "B", + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Monitor Status", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 5, + "links": [], + "minSpan": 2, + "mode": "html", + "span": 2, + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "

Cluster Flags:

", + "id": 6, + "links": [], + "minSpan": 1, + "mode": "html", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "SCRUB", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DEEP", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "RECOVERY", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfill >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "BACKFILL", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norebalance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "REBALANCE", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noout)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "OUT", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "95", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "DOWN", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster State", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 14, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 1, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "none" + }, + { + "alias": "Host.OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "title": "Disks Near Full", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 15, + "links": [], + "minSpan": 1, + "pageSize": 100, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 1, + "styles": [ + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "sanitize": false, + "thresholds": [ + "" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Status", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [ + "0" + ], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_up < 0.5", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "B", + "step": 2, + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD's Down", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 16, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 1, + "styles": [ + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_in < 0.5", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 4 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Out", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 17, + "links": [], + "minSpan": 1, + "mode": "html", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 4, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Object State", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "expr": "ceph_cluster_total_objects", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "intervalFactor": 2, + "legendFormat": "Total Objects", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 20, + "tags": [], + "textEditor": true + }, + { + "alias": "Objects", + "dsType": "influxdb", + "expr": "ceph_pg_degraded", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "intervalFactor": 2, + "legendFormat": "Objects degraded (CHECK)", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "metric": "", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 20, + "tags": [], + "textEditor": true + }, + { + "alias": "Objects", + "dsType": "influxdb", + "expr": "ceph_pg_remapped", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "intervalFactor": 2, + "legendFormat": "Objects remapped (CHECK)", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 20, + "tags": [], + "textEditor": true + }, + { + "alias": "Objects", + "dsType": "influxdb", + "expr": "ceph_pg_incomplete", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "intervalFactor": 2, + "legendFormat": "Objects incomplete (CHECK)", + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 20, + "tags": [], + "textEditor": true + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "content": "", + "id": 19, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "filterNull": false, + "fontSize": "100%", + "id": 20, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "PG State", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "ceph_pg_total", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "PGs", + "refId": "A", + "step": 20 + }, + { + "expr": "ceph_pg_active", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "PGs Active", + "refId": "B", + "step": 20 + }, + { + "expr": "ceph_pg_clean", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "PGs Active+clean", + "refId": "C", + "step": 20 + }, + { + "expr": "ceph_pg_peering", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "PGs peering", + "refId": "D", + "step": 20 + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage Information", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "reesi001", + "value": "reesi001" + }, + "datasource": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/instance=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Health" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-osd-information.json b/dashboards/mgr-prometheus/ceph-osd-information.json new file mode 100644 index 0000000..b5466b2 --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-osd-information.json @@ -0,0 +1,2189 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "220px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(max by (id) (ceph_osd_metadata))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up > 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs UP", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(251,251,251,0.97)", + "rgba(255,165,0, 1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "hideTimeOverride": true, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up < 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "1,3", + "timeFrom": "1m", + "title": "OSDs DOWN", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "description": "The pie chart shows the various OSD sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "height": "220", + "hideTimeOverride": true, + "id": 4, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "sortDesc": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": "", + "minSpan": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": "1", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1 TB", + "refId": "A", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2 TB", + "refId": "B", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "12TB+", + "refId": "I", + "step": 2 + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "hideTimeOverride": true, + "id": 5, + "links": [], + "maxDataPoints": "", + "minSpan": 2, + "pageSize": 50, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "alias": "Hostname|OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by (instance, ceph_daemon) (\n ceph_disk_occupation*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}|{{ceph_daemon}}", + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD Size", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "hideTimeOverride": true, + "id": 6, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "C", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "D", + "targetFull": "alias(countSeries(currentAbove(currentAbove(transformNull(keepLastValue(collectd.*.cephmetrics.gauge.$cluster_name.osd.*.encrypted),-1),0),0.5)),\"Encrypted\")", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "E", + "targetFull": "alias(countSeries(currentBelow(currentAbove(transformNull(keepLastValue(collectd.*.cephmetrics.gauge.$cluster_name.osd.*.encrypted),-1),0),0.5)),\"Non-Encrypted\")", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Encryption Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "hideTimeOverride": true, + "id": 7, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A", + "step": 240 + }, + { + "expr": "count(ceph_osd_metadata) - count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B", + "step": 240 + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "C", + "step": 240 + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "Summary of OSD Types", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Summary", + "titleSize": "h5" + }, + { + "collapse": false, + "height": "500", + "panels": [ + { + "content": "

Ceph Filestore I/O Process

\n

\nA write request is first committed to a journal using direct-io (apply). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (commit). The commit operation is basically a measure of time taken to perform a syncfs call to flush dirty pages to disk, and is therefore not a time associated with any specific client initiated operation.

The tables on the right show the top 10 OSDs with the highest latencies.\n", + "height": "300", + "id": 8, + "links": [], + "minSpan": 3, + "mode": "html", + "span": 3, + "title": "", + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Filestore OSDs", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 9, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 1, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_disk_occupation{ceph_daemon!~\"$bluestore_osd_id\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time spent in the queue for the journal. Excessive times here may indicate OSD tthrottling is happening. In this scenario you should review the OSD specific settings in \"ceph.conf\"; filestore_queue_max_ops or filestore_queue_max_bytes", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 10, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Queue Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time taken for the write request to be safely committed to the journal device", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 11, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "0.01", + "0.1" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Apply latency covers the time taken to commit to the journal and complete the transaction", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 12, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Apply Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "100000", + "500000" + ], + "type": "number", + "unit": "ms" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10, ceph_osd_apply_latency_ms{ceph_daemon!~\"$bluestore_osd_id\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Commit latency is the time taken for writes to be flushed to disk as part of async kernel activity", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 13, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Commit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "1000", + "3000" + ], + "type": "number", + "unit": "ms" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10, ceph_osd_commit_latency_ms{ceph_daemon!~\"$bluestore_osd_id\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, only the **first** OSD is shown to prevent the graph from being unreadable!", + "fill": 0, + "height": "300px", + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Apply Latency Max", + "fill": 0 + }, + { + "alias": "95%ile Apply Latency", + "fill": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "300px", + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filestore OSD Latencies", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "content": "

Ceph Bluestore I/O Process

\n

\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (submit and throttle latencies), it is done directly to the disk (AIO wait), and then the metadata relating to the object is changed (kv_commit). Writes are not considered complete until the kv store is updated.

The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n", + "height": "300", + "id": 16, + "links": [], + "minSpan": 3, + "mode": "html", + "span": 3, + "title": "", + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Bluestore OSDs", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 17, + "links": [], + "minSpan": 1, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 1, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_disk_occupation{ceph_daemon=~\"($bluestore_osd_id)\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time spent preparing the request (transaction)", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Submit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time requests wait due to throttling or busy conditions", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 19, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Throttle Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".002", + ".005" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [ + "" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time spent waiting for the physical I/O request to complete", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 20, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "AIO Wait Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".020", + ".050" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "Time spent waiting for rocksdb (metadata store) to commit meta data", + "fontSize": "100%", + "height": "310", + "hideTimeOverride": true, + "id": 21, + "links": [], + "minSpan": 2, + "pageSize": 5, + "scroll": false, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "KV Commit ", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".003", + ".005" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "300", + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bluestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD", + "fill": 1, + "height": "300px", + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C", + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "BlueStore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bluestore OSD Latencies", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + } + ], + "query": "10", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id", + "options": [], + "query": "ceph_osd_metadata", + "refresh": 1, + "regex": "/id=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id_hidden", + "options": [], + "query": "ceph_osd_metadata", + "refresh": 1, + "regex": "/id=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "bluestore_osd_id", + "options": [], + "query": "label_values(ceph_bluefs_bytes_written_wal, ceph_daemon)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph OSD Information" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/ceph-pools.json b/dashboards/mgr-prometheus/ceph-pools.json new file mode 100644 index 0000000..8d5e268 --- /dev/null +++ b/dashboards/mgr-prometheus/ceph-pools.json @@ -0,0 +1,1490 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.bytes_sec,-2,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Throughput by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool Overview", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 3, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS (R+W)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(5,(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) )", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "target": "limit(sortByMaxima(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.op_per_sec,-2,\"maxSeries\")),5)", + "textEditor": true + } + ], + "title": "Top 5 Pools by Client IOPS", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "id": 4, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "bytes" + } + ], + "targets": [ + { + "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "target": "limit(sortByMaxima(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.bytes_sec,-2,\"maxSeries\")),5)", + "textEditor": true + } + ], + "title": "Top 5 Pools by Throughput", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "id": 5, + "links": [], + "minSpan": 4, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "span": 4, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Capacity Used", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "percentunit" + } + ], + "targets": [ + { + "expr": "topk(5,((ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)) + on(pool_id) group_left(name) ceph_pool_metadata))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 2, + "refId": "D" + } + ], + "title": "Top 5 Pools By Capacity Used", + "transform": "table", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 5's", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "data", + "value": "data" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "data", + "value": "data" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": "pool_name", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "djf_tmp", + "value": "djf_tmp" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "djf_tmp", + "value": "djf_tmp" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1519089276776, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "libvirt-pool", + "value": "libvirt-pool" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "libvirt-pool", + "value": "libvirt-pool" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1519089276776, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "metadata", + "value": "metadata" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "B", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "pool_name": { + "selected": false, + "text": "metadata", + "value": "metadata" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "C", + "target": "aliasByNode(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec), -1)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": 1519089276776, + "repeatRowId": 3, + "showTitle": true, + "title": "Pool '$pool_name' Performance Details", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "pool_id", + "options": [], + "query": "label_values(ceph_pool_metadata,pool_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "label_values(ceph_pool_metadata,name)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Pools" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/disk-busy-by-server.json b/dashboards/mgr-prometheus/disk-busy-by-server.json new file mode 100644 index 0000000..6f00c19 --- /dev/null +++ b/dashboards/mgr-prometheus/disk-busy-by-server.json @@ -0,0 +1,1633 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "400", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\"}[30s]) / 10),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{host}}", + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Hosts Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "apama002", + "value": "apama002" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira019", + "value": "mira019" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira021", + "value": "mira021" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira031", + "value": "mira031" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira049", + "value": "mira049" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira055", + "value": "mira055" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira060", + "value": "mira060" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira070", + "value": "mira070" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira076", + "value": "mira076" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira087", + "value": "mira087" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira093", + "value": "mira093" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira099", + "value": "mira099" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira116", + "value": "mira116" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira120", + "value": "mira120" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira122", + "value": "mira122" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "reesi001", + "value": "reesi001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Disk Busy By Server" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/iops-by-server.json b/dashboards/mgr-prometheus/iops-by-server.json new file mode 100644 index 0000000..ef1c339 --- /dev/null +++ b/dashboards/mgr-prometheus/iops-by-server.json @@ -0,0 +1,1535 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 0 + }, + "height": "400", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]]\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{host}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOPS Across All OSD Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "apama002", + "value": "apama002" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira019", + "value": "mira019" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira021", + "value": "mira021" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira031", + "value": "mira031" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira049", + "value": "mira049" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira055", + "value": "mira055" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira060", + "value": "mira060" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira070", + "value": "mira070" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira076", + "value": "mira076" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira087", + "value": "mira087" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira093", + "value": "mira093" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira099", + "value": "mira099" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira116", + "value": "mira116" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira120", + "value": "mira120" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira122", + "value": "mira122" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519091567712, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "reesi001", + "value": "reesi001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "IOPS Load by Server", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Host", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "IOPS by Server" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/iscsi-overview.json b/dashboards/mgr-prometheus/iscsi-overview.json new file mode 100644 index 0000000..76b930b --- /dev/null +++ b/dashboards/mgr-prometheus/iscsi-overview.json @@ -0,0 +1,1690 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "4.6.3" + }, + { + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "type": "panel", + "version": "1.1.5" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "" + }, + { + "id": "text", + "name": "Text", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "150px", + "panels": [ + { + "content": "", + "id": 8, + "links": [], + "minSpan": 2, + "mode": "markdown", + "span": 2, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(ceph_iscsi_gateway_tpg_total)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Gateways", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_client_login))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Clients", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(sum(ceph_iscsi_client_login) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Sessions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "decimals": 0, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(sum(ceph_iscsi_lun_size_bytes) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Defined Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_lun_mapped))", + "format": "time_series", + "intervalFactor": 2, + "refId": "B" + } + ], + "thresholds": "", + "title": "LUNs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_lun_mapped)) - max(count by(instance) (ceph_iscsi_lun_mapped == 1))", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Unused LUNs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "id": 9, + "links": [], + "minSpan": 3, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "", + "id": 10, + "links": [], + "minSpan": 1, + "mode": "markdown", + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "200", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(sum(rate(ceph_iscsi_lun_iops[30s])) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Local", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "200", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((sum(rate(ceph_iscsi_lun_read_bytes[30s])) by(gw_name)) + (sum(rate(ceph_iscsi_lun_write_bytes[30s])) by(gw_name)))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Throughput", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "200", + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(node_network_transmit_bytes{device=~\"(eth|bon|en).*\",job=\"iscsi_gw\"}[30s]),\"IP\",\"$1\",\"instance\",\"(.*)\\\\:.*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{IP}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Local", + "fontSize": "80%", + "format": "short", + "id": 14, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "minSpan": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": 1, + "targets": [ + { + "expr": "max(count(ceph_iscsi_lun_mapped) by(instance,gw_owner)) by(gw_owner)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{gw_owner}}", + "refId": "A" + } + ], + "title": "Primary LUN Paths by Gateway", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "iSCSI Gateway Group : $gateway_iqn", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(IP) (label_replace((rate(ceph_iscsi_lun_iops[30s])),\"IP\",\"$1\",\"instance\",\"(.*)\\\\:.*\")))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{IP}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS Load by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(IP) (label_replace((rate(ceph_iscsi_lun_read_bytes[30s])),\"IP\",\"$1\",\"instance\",\"(.*)\\\\:.*\") + (label_replace((rate(ceph_iscsi_lun_write_bytes[30s])),\"IP\",\"$1\",\"instance\",\"(.*)\\\\:.*\")) ))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{IP}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS Throughput by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (IP) (label_replace(irate(node_cpu{job=\"iscsi_gw\",mode=\"idle\"}[30s]),\"IP\",\"$1\",\"instance\",\"(.*):(.*)\")) * 100)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{IP}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(node_memory_MemAvailable{job=\"iscsi_gw\"},\"IP\",\"$1\",\"instance\",\"(.*):(.*)\")) / \n(label_replace(node_memory_MemTotal{job=\"iscsi_gw\"},\"IP\",\"$1\",\"instance\",\"(.*):(.*)\")) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{IP}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RAM Util%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Gateway Load", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(shortname) (label_replace(rate(ceph_iscsi_lun_iops[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")))", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{shortname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Aggregated IOPS by Client", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum by(shortname) \n ((label_replace(rate(ceph_iscsi_lun_read_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")) +\n (label_replace(rate(ceph_iscsi_lun_write_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\"))\n ))", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{shortname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Client", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Client Workload Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_iops{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{lun_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS Detail for $client_iqn", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_read_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{lun_name}}(r)", + "refId": "A" + }, + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_write_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{lun_name}}(w)", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput for $client_iqn", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": "Local", + "fontSize": "100%", + "id": 24, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "span": 6, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Client IQN", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "client_iqn", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Path Owner", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "gw_owner", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "LUN Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "lun_name", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "bytes" + } + ], + "targets": [ + { + "expr": "max by(lun_name,client_iqn) (ceph_iscsi_client_lun{client_iqn=~\"[[client_iqn]]\"}) +\n on(lun_name) group_right(client_iqn) (max(ceph_iscsi_lun_mapped) by(lun_name, gw_owner)) +\n on(lun_name) group_right(client_iqn, gw_owner) max(ceph_iscsi_lun_size_bytes) by(lun_name)\n\n", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "C" + } + ], + "title": "", + "transform": "table", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Client Details : $client_iqn", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "Local", + "hide": 0, + "includeAll": false, + "label": "Client IQN", + "multi": false, + "name": "client_iqn", + "options": [], + "query": "label_values(ceph_iscsi_client_login,client_iqn)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "gateway_iqn", + "options": [], + "query": "label_values(ceph_iscsi_gateway_tpg_total, gw_iqn)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "iSCSI Overview" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/latency-by-server.json b/dashboards/mgr-prometheus/latency-by-server.json new file mode 100644 index 0000000..8c82a17 --- /dev/null +++ b/dashboards/mgr-prometheus/latency-by-server.json @@ -0,0 +1,1569 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All OSD Hosts - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All OSD Hosts", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira055", + "value": "mira055" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira060", + "value": "mira060" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira019", + "value": "mira019" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira021", + "value": "mira021" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira070", + "value": "mira070" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira087", + "value": "mira087" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira049", + "value": "mira049" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira099", + "value": "mira099" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira031", + "value": "mira031" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira116", + "value": "mira116" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira076", + "value": "mira076" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira093", + "value": "mira093" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira120", + "value": "mira120" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "apama002", + "value": "apama002" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "mira122", + "value": "mira122" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090792110, + "repeatPanelId": 2, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "reesi001", + "value": "reesi001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Host's Max Disk Latency", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Latency by Server" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/network-usage-by-node.json b/dashboards/mgr-prometheus/network-usage-by-node.json new file mode 100644 index 0000000..b0979e0 --- /dev/null +++ b/dashboards/mgr-prometheus/network-usage-by-node.json @@ -0,0 +1,399 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "300px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[mon_servers]]\",device=~\"(eth|en|bond).*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[mon_servers]]\",device=~\"(eth|en|bond).*\"}[30s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "MONs", + "refId": "A" + }, + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[only_osds]]\",device=~\"(eth|en|bond).*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[only_osds]]\",device=~\"(eth|en|bond).*\"}[30s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OSDs", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Aggregated Network Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[mon_servers]].*\", device=~\"(eth|en|bond).*\"}[30s])) by (instance) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[mon_servers]].*\", device=~\"(eth|en|bond).*\"}[30s])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "MON Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "MON Hosts", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[osd_servers]].*\", device=~\"(eth|en|bond).*\"}[30s])) by (instance) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[osd_servers]].*\", device=~\"(eth|en|bond).*\"}[30s])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Hosts Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Hosts", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_server_metadata{services=~\".*osd.*\"}, hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_servers", + "options": [], + "query": "label_values(ceph_server_metadata{services=~\".*mon.*\"}, hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "only_osds", + "options": [], + "query": "label_values(ceph_server_metadata{services=\"osd\"}, hostname)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Network Usage by Node" +} \ No newline at end of file diff --git a/dashboards/mgr-prometheus/osd-node-detail.json b/dashboards/mgr-prometheus/osd-node-detail.json new file mode 100644 index 0000000..02fbc3d --- /dev/null +++ b/dashboards/mgr-prometheus/osd-node-detail.json @@ -0,0 +1,965 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 125, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "160", + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers)\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "160", + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers)\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": null, + "description": "", + "fontSize": "100%", + "height": "160", + "hideTimeOverride": false, + "id": 3, + "links": [], + "minSpan": 3, + "pageSize": 1000, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Host | Device | OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "max(ceph_disk_occupation{instance=~\"($osd_servers)\"}) by (instance, device, ceph_daemon)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} | {{device}} | {{ceph_daemon}}", + "metric": "", + "refId": "A", + "step": 20, + "textEditor": true + } + ], + "timeFrom": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "description": "", + "fontSize": "100%", + "height": "160", + "hideTimeOverride": true, + "id": 4, + "links": [], + "maxDataPoints": "1", + "minSpan": 3, + "pageSize": 1000, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 3, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Disk Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "(\n ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers)\"}*0\n + on (ceph_daemon) group_right(instance, device) ceph_osd_stat_bytes\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}.{{device}}", + "refId": "A", + "step": 240, + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "'$osd_servers' OSD Overview", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\", device=~\"($device_id)\"}\n) / 10", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%Util", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_reads_completed{device=~\"($device_id)\", instance=~\"($osd_servers)\"}[5m]) +\n irate(node_disk_writes_completed{device=~\"($device_id)\", instance=~\"($osd_servers)\"}[5m])\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_write_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers)\"}[5m])\n /\n clamp_min(irate(node_disk_writes_completed{device=~ \"($device_id)\", instance=~\"($osd_servers)\"}[5m]), 0.001)\n+\n irate(node_disk_read_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers)\"}[5m])\n /\n clamp_min(irate(node_disk_reads_completed{device=~ \"($device_id)\", instance=~\"($osd_servers)\"}[5m]), 0.001)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_bytes_read{device=~\"($device_id)\", instance=~\"($osd_servers)\"}[5m]) + \n irate(node_disk_bytes_written{device=~\"($device_id)\", instance=~\"($osd_servers)\"}[5m])\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 3, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[5m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($osd_servers)\"}[5m]))\n) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (irate(node_network_receive_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}.rx", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "sum by (device) (irate(node_network_transmit_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}.tx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "'$osd_servers' Performance Statistics", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "tags": [], + "text": "apama002", + "value": "apama002" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "OSD Host Name", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/instance=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "Disk Name", + "multi": true, + "name": "device_id", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/device=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "60", + "value": "60" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "60", + "value": "60" + } + ], + "query": "60", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "OSD Node Detail" +} \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..c102b62 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,23 @@ +version: '2' + +services: + grafana: + image: docker.io/grafana/grafana:latest + container_name: grafana + ports: + - "3000:3000" + volumes: + - /opt/docker/grafana/etc:/etc/grafana:Z + - /opt/docker/grafana/data:/var/lib/grafana:Z + depends_on: + - graphite + graphite: + image: docker.io/abezhenar/graphite-centos7 + container_name: graphite + ports: + - "80:80" + - "2003:2003" + - "2004:2004" + - "7002:7002" + volumes: + - /opt/docker/graphite:/var/lib/graphite/storage/whisper:Z diff --git a/etc/collectd.conf b/etc/collectd.conf new file mode 100644 index 0000000..5c8a26d --- /dev/null +++ b/etc/collectd.conf @@ -0,0 +1,18 @@ +# Hostname "obj-mon-1.storage.lab" +BaseDir "/var/lib/collectd" +PluginDir "/usr/lib64/collectd" + +TypesDB "/usr/share/collectd/types.db" + +Interval 10 +Timeout 5 +ReadThreads 5 + + + Globals true + + +LoadPlugin "threshold" +LoadPlugin "aggregation" + +Include "/etc/collectd.d/*.conf" diff --git a/etc/collectd.d/cephmetrics.conf b/etc/collectd.d/cephmetrics.conf new file mode 100644 index 0000000..1f5f6ce --- /dev/null +++ b/etc/collectd.d/cephmetrics.conf @@ -0,0 +1,10 @@ +LoadPlugin "python" + + + ModulePath "/usr/lib64/collectd/cephmetrics" + LogTraces true + Import "cephmetrics" + + ClusterName "ceph" + + diff --git a/etc/collectd.d/cpu.conf b/etc/collectd.d/cpu.conf new file mode 100644 index 0000000..bd65ef0 --- /dev/null +++ b/etc/collectd.d/cpu.conf @@ -0,0 +1,17 @@ +LoadPlugin cpu + + + ValuesPercentage true + ReportByCpu false + + + + + + Instance user + FailureMax 90 + WarningMax 80 + + + + diff --git a/etc/collectd.d/memory.conf b/etc/collectd.d/memory.conf new file mode 100644 index 0000000..481950c --- /dev/null +++ b/etc/collectd.d/memory.conf @@ -0,0 +1,26 @@ +LoadPlugin memory + + + ValuesPercentage true + + + + + Plugin "memory" + Type "memory" + + GroupBy "Host" + + CalculateSum true + + + + + + + Instance used + FailureMax 90 + WarningMax 80 + + + diff --git a/etc/collectd.d/nics.conf b/etc/collectd.d/nics.conf new file mode 100644 index 0000000..7494ca1 --- /dev/null +++ b/etc/collectd.d/nics.conf @@ -0,0 +1 @@ +LoadPlugin interface diff --git a/etc/collectd.d/write_graphite.conf b/etc/collectd.d/write_graphite.conf new file mode 100644 index 0000000..b93ea61 --- /dev/null +++ b/etc/collectd.d/write_graphite.conf @@ -0,0 +1,19 @@ +LoadPlugin "write_graphite" + + + Host "graphite.storage.lab" + Port "2003" + Protocol "tcp" +# ReconnectInterval 0 + LogSendErrors true + Prefix "collectd." +# Postfix "collectd" + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + PreserveSeparator true + SeparateInstances true +# DropDuplicateFields true + + + diff --git a/etc/grafana/grafana.ini b/etc/grafana/grafana.ini new file mode 100644 index 0000000..e95cdc7 --- /dev/null +++ b/etc/grafana/grafana.ini @@ -0,0 +1,408 @@ +##################### Grafana Configuration Example ##################### +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +; app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +; instance_name = ${HOSTNAME} + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +# +;data = /var/lib/grafana +# +# Directory where grafana can store logs +# +;logs = /var/log/grafana +# +# Directory where grafana will automatically scan and look for plugins +# +;plugins = /var/lib/grafana/plugins + +# +#################################### Server #################################### +[server] +# Protocol (http, https, socket) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +;http_port = 3000 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +;root_url = http://localhost:3000 +root_url = %(protocol)s://%(domain)s:%(http_port)s/ + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# Unix socket path +;socket = + +#################################### Database #################################### +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as seperate properties or as on string using the url propertie. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +;password = + +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +;url = + +# For "postgres" only, either "disable", "require" or "verify-full" +;ssl_mode = disable + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +# Max conn setting default is 0 (mean not set) +;max_idle_conn = +;max_open_conn = + + +#################################### Session #################################### +[session] +# Either "memory", "file", "redis", "mysql", "postgres", default is "file" +;provider = file + +# Provider config options +# memory: not have any config yet +# file: session dir path, is relative to grafana data_path +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` +# mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` +# postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable +;provider_config = sessions + +# Session cookie name +;cookie_name = grafana_sess + +# If you use session in https only, default is false +;cookie_secure = false + +# Session life time, default is 86400 +;session_life_time = 86400 + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +;logging = false + + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +;reporting_enabled = true + +# Set to false to disable all checks to https://grafana.net +# for new vesions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to http://grafana.com to get latest versions +;check_for_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +#################################### Security #################################### +[security] +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# Auto-login remember days +;login_remember_days = 7 +;cookie_username = grafana_user +;cookie_remember_name = grafana_remember + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +[snapshots] +# snapshot sharing options +;external_enabled = true +;external_snapshot_url = https://snapshots-origin.raintank.io +;external_snapshot_name = Publish to snapshot.raintank.io + +# remove expired snapshot +;snapshot_remove_expired = true + +# remove snapshots after 90 days +;snapshot_TTL_days = 90 + +#################################### Users #################################### +[users] +# disable user signup / registration +;allow_sign_up = true + +# Allow non admin users to create organizations +;allow_org_create = true + +# Set to true to automatically assign new users to the default organization (id 1) +;auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +;auto_assign_org_role = Viewer + +# Background text for the user field on the login page +;login_hint = email or username + +# Default UI theme ("dark" or "light") +;default_theme = dark + +[auth] +# Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false +;disable_login_form = false + +# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false +;disable_signout_menu = false + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +;enabled = false + +# specify organization name that should be used for unauthenticated users +;org_name = Main Org. + +# specify role for unauthenticated users +;org_role = Viewer + +#################################### Github Auth ########################## +[auth.github] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://github.com/login/oauth/authorize +;token_url = https://github.com/login/oauth/access_token +;api_url = https://api.github.com/user +;team_ids = +;allowed_organizations = + +#################################### Google Auth ########################## +[auth.google] +;enabled = false +;allow_sign_up = true +;client_id = some_client_id +;client_secret = some_client_secret +;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email +;auth_url = https://accounts.google.com/o/oauth2/auth +;token_url = https://accounts.google.com/o/oauth2/token +;api_url = https://www.googleapis.com/oauth2/v1/userinfo +;allowed_domains = + +#################################### Generic OAuth ########################## +[auth.generic_oauth] +;enabled = false +;name = OAuth +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://foo.bar/login/oauth/authorize +;token_url = https://foo.bar/login/oauth/access_token +;api_url = https://foo.bar/user +;team_ids = +;allowed_organizations = + +#################################### Grafana.com Auth #################### +[auth.grafananet] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email +;allowed_organizations = + +#################################### Auth Proxy ########################## +[auth.proxy] +;enabled = false +;header_name = X-WEBAUTH-USER +;header_property = username +;auto_sign_up = true +;ldap_sync_ttl = 60 +;whitelist = 192.168.1.1, 192.168.2.1 + +#################################### Basic Auth ########################## +[auth.basic] +;enabled = true + +#################################### Auth LDAP ########################## +[auth.ldap] +;enabled = false +;config_file = /etc/grafana/ldap.toml +;allow_sign_up = true + +#################################### SMTP / Emailing ########################## +[smtp] +;enabled = false +;host = localhost:25 +;user = +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +;password = +;cert_file = +;key_file = +;skip_verify = false +;from_address = admin@grafana.localhost +;from_name = Grafana + +[emails] +;welcome_email_on_sign_up = false + +#################################### Logging ########################## +[log] +# Either "console", "file", "syslog". Default is console and file +# Use space to separate multiple modes, e.g. "console file" +;mode = console file + +# Either "trace", "debug", "info", "warn", "error", "critical", default is "info" +;level = info + +# optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug +;filters = + + +# For "console" mode only +[log.console] +;level = + +# log line format, valid options are text, console and json +;format = console + +# For "file" mode only +[log.file] +;level = + +# log line format, valid options are text, console and json +;format = text + +# This enables automated log rotate(switch of following options), default is true +;log_rotate = true + +# Max line number of single file, default is 1000000 +;max_lines = 1000000 + +# Max size shift of single file, default is 28 means 1 << 28, 256MB +;max_size_shift = 28 + +# Segment log daily, default is true +;daily_rotate = true + +# Expired days of log file(delete after max days), default is 7 +;max_days = 7 + +[log.syslog] +;level = + +# log line format, valid options are text, console and json +;format = text + +# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. +;network = +;address = + +# Syslog facility. user, daemon and local0 through local7 are valid. +;facility = + +# Syslog tag. By default, the process' argv[0] is used. +;tag = + + +#################################### AMQP Event Publisher ########################## +[event_publisher] +;enabled = false +;rabbitmq_url = amqp://localhost/ +;exchange = grafana_events + +;#################################### Dashboard JSON files ########################## +[dashboards.json] +;enabled = false +;path = /var/lib/grafana/dashboards + +#################################### Alerting ############################ +[alerting] +# Disable alerting engine & UI features +;enabled = true +# Makes it possible to turn off alert rule execution but alerting UI is visible +;execute_alerts = true + +#################################### Internal Grafana Metrics ########################## +# Metrics available at HTTP API Url /api/metrics +[metrics] +# Disable / Enable internal metrics +;enabled = true + +# Publish interval +;interval_seconds = 10 + +# Send internal metrics to Graphite +[metrics.graphite] +# Enable by setting the address setting (ex localhost:2003) +;address = +;prefix = prod.grafana.%(instance_name)s. + +#################################### Grafana.com integration ########################## +# Url used to to import dashboards directly from Grafana.com +[grafana_net] +;url = https://grafana.com + +#################################### External image storage ########################## +[external_image_storage] +# Used for uploading images to public servers so they can be included in slack/email messages. +# you can choose between (s3, webdav) +;provider = + +[external_image_storage.s3] +;bucket_url = +;access_key = +;secret_key = + +[external_image_storage.webdav] +;url = +;public_url = +;username = +;password = diff --git a/patches/0001-ansible-Disable-devel_mode.patch b/patches/0001-ansible-Disable-devel_mode.patch new file mode 100644 index 0000000..5ddfac0 --- /dev/null +++ b/patches/0001-ansible-Disable-devel_mode.patch @@ -0,0 +1,28 @@ +From 49ffd15645a8b377b600f44102cad613a71fdd2b Mon Sep 17 00:00:00 2001 +From: Boris Ranto +Date: Fri, 6 Oct 2017 12:22:37 +0200 +Subject: [PATCH] ansible: Disable devel_mode + +Signed-off-by: Boris Ranto +--- + ansible/roles/cephmetrics-common/tasks/merge_vars.yml | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/ansible/roles/cephmetrics-common/tasks/merge_vars.yml b/ansible/roles/cephmetrics-common/tasks/merge_vars.yml +index f8dbcd0..15d2a6b 100644 +--- a/ansible/roles/cephmetrics-common/tasks/merge_vars.yml ++++ b/ansible/roles/cephmetrics-common/tasks/merge_vars.yml +@@ -3,3 +3,10 @@ + set_fact: {"{{ item }}": "{% if vars[item] is not defined %}{{ defaults[item] }}{% elif vars[item] is mapping %}{{ defaults[item]|combine(vars[item]|default({})) }}{% else %}{{ vars[item] }}{% endif %}"} + with_items: "{{ defaults.keys() }}" + no_log: true ++ ++- name: Make sure devel_mode is not on ++ assert: ++ that: ++ - devel_mode == False ++ - use_epel == False ++ msg: "Devel mode is not supported in the downstream builds" +-- +2.9.5 + diff --git a/screenshots/archive/dashboard-2017-05-19.png b/screenshots/archive/dashboard-2017-05-19.png new file mode 100644 index 0000000..fde57c5 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-19.png differ diff --git a/screenshots/archive/dashboard-2017-05-24.png b/screenshots/archive/dashboard-2017-05-24.png new file mode 100644 index 0000000..3701904 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-24.png differ diff --git a/screenshots/archive/dashboard-2017-05-26.png b/screenshots/archive/dashboard-2017-05-26.png new file mode 100644 index 0000000..611f9d6 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-26.png differ diff --git a/screenshots/archive/dashboard-2017-05-29.png b/screenshots/archive/dashboard-2017-05-29.png new file mode 100644 index 0000000..6089f79 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-29.png differ diff --git a/screenshots/at-a-glance.png b/screenshots/at-a-glance.png new file mode 100644 index 0000000..466a879 Binary files /dev/null and b/screenshots/at-a-glance.png differ diff --git a/screenshots/ceph-backend.png b/screenshots/ceph-backend.png new file mode 100644 index 0000000..8da34f0 Binary files /dev/null and b/screenshots/ceph-backend.png differ diff --git a/screenshots/ceph-frontend.png b/screenshots/ceph-frontend.png new file mode 100644 index 0000000..cb3dee4 Binary files /dev/null and b/screenshots/ceph-frontend.png differ diff --git a/screenshots/ceph-rados.png b/screenshots/ceph-rados.png new file mode 100644 index 0000000..630d4ad Binary files /dev/null and b/screenshots/ceph-rados.png differ diff --git a/screenshots/ceph-rgw.png b/screenshots/ceph-rgw.png new file mode 100644 index 0000000..ad271c9 Binary files /dev/null and b/screenshots/ceph-rgw.png differ diff --git a/screenshots/disk-busy-by-server.png b/screenshots/disk-busy-by-server.png new file mode 100644 index 0000000..2b5d4a2 Binary files /dev/null and b/screenshots/disk-busy-by-server.png differ diff --git a/screenshots/disk-latency-by-server.png b/screenshots/disk-latency-by-server.png new file mode 100644 index 0000000..8bfa17a Binary files /dev/null and b/screenshots/disk-latency-by-server.png differ diff --git a/screenshots/iops-by-server.png b/screenshots/iops-by-server.png new file mode 100644 index 0000000..189ca46 Binary files /dev/null and b/screenshots/iops-by-server.png differ diff --git a/screenshots/network-load.png b/screenshots/network-load.png new file mode 100644 index 0000000..6f49966 Binary files /dev/null and b/screenshots/network-load.png differ diff --git a/screenshots/osd-node-details.png b/screenshots/osd-node-details.png new file mode 100644 index 0000000..21be086 Binary files /dev/null and b/screenshots/osd-node-details.png differ diff --git a/selinux/cephmetrics.te b/selinux/cephmetrics.te new file mode 100644 index 0000000..23ef409 --- /dev/null +++ b/selinux/cephmetrics.te @@ -0,0 +1,42 @@ +policy_module(cephmetrics, 1.1.0) + +require { + type bin_t; + type collectd_t; + type ceph_t; + type ceph_exec_t; + type ceph_var_run_t; + type ceph_var_lib_t; + type fixed_disk_device_t; + type tmp_t; + class unix_stream_socket connectto; + class dir read; + class file getattr; + class capability2 block_suspend; + class lnk_file read; + class unix_stream_socket connectto; + class dir { read write }; + class capability2 block_suspend; + class file { getattr execute execute_no_trans open read write }; +} + +#============= collectd_t ============== + +allow collectd_t bin_t:file { execute execute_no_trans }; +#!!!! This avc can be allowed using the boolean 'daemons_enable_cluster_mode' +allow collectd_t ceph_t:unix_stream_socket connectto; +allow collectd_t ceph_exec_t:file { execute execute_no_trans open read }; +allow collectd_t ceph_var_run_t:dir read; +allow collectd_t ceph_var_lib_t:dir read; +allow collectd_t ceph_var_lib_t:file { getattr open read }; +allow collectd_t ceph_var_lib_t:lnk_file { getattr open read }; +allow collectd_t fixed_disk_device_t:blk_file getattr; +allow collectd_t self:capability2 block_suspend; +allow collectd_t var_log_t:dir { add_name write }; +allow collectd_t var_log_t:file create; +corecmd_exec_shell(collectd_t) +files_list_tmp(collectd_t) +libs_exec_ldconfig(collectd_t) +libs_manage_lib_dirs(collectd_t) +libs_manage_lib_files(collectd_t) +logging_write_generic_logs(collectd_t) diff --git a/setup/add_datasource.json b/setup/add_datasource.json new file mode 100644 index 0000000..64d9ef8 --- /dev/null +++ b/setup/add_datasource.json @@ -0,0 +1,8 @@ +{ + "name":"Local", + "type":"graphite", + "url":"http://192.168.1.52", + "access":"proxy", + "basicAuth":false, + "isDefault":true +} diff --git a/tests/testosd.py b/tests/testosd.py new file mode 100644 index 0000000..53dc139 --- /dev/null +++ b/tests/testosd.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +from collectors.osd import OSDs +from collectors.common import flatten_dict + +import time + +def main(): + o = OSDs('ceph') + ctr = 0 + while ctr < 30: + + s = o.get_stats() + print(s) + print(flatten_dict(s)) + + time.sleep(1) + ctr += 1 + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..0c8e78e --- /dev/null +++ b/tox.ini @@ -0,0 +1,39 @@ +[tox] +skipsdist = True +envlist=ansible-lint,ansible-syntax,flake8 + +[testenv:ansible-lint] +install_command = pip install --upgrade {opts} {packages} +deps= + ansible-lint +commands=ansible-lint -x ANSIBLE0010,ANSIBLE0012,ANSIBLE0017 ansible/playbook.yml + +[testenv:ansible-syntax] +install_command = pip install --upgrade {opts} {packages} +deps= + ansible==2.2.3.0 +changedir=ansible +commands= + ansible-playbook -i '127.0.0.1,' playbook.yml --syntax-check -vv + +[testenv:flake8] +install_command = pip install --upgrade {opts} {packages} +deps= + flake8 +commands=flake8 --select=F,E9 {posargs:*.py collectors tests} + +# Integration tests must operate against a live deployment. To run, simply: +# tox -e integration /path/to/inventory +# NOTE: A current limitation of these tests is that they assume that defaults +# were used for things like ports, usernames, etc. They do, however, +# support devel_mode=True/False. +[testenv:integration] +install_command = pip install --upgrade {opts} {packages} +deps= + ansible + pytest + pytest-xdist + testinfra +changedir=ansible +commands= + py.test -n auto --connection=ansible --ansible-inventory {posargs} ./roles/