From: Paul Cuzner Date: Wed, 2 Aug 2017 02:03:31 +0000 (+1200) Subject: removed dashboards X-Git-Tag: v1.0~36^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2cbef6f942fd1f822112aa123f1e901dad20238e;p=cephmetrics.git removed dashboards --- 2cbef6f942fd1f822112aa123f1e901dad20238e diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7079d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# vim swap files +*.swp + +# Python virtualenv +virtualenv/ + +# ansible retry files +*.retry diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..4ca0c09 --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,125 @@ +# Installation Process + +## Objective: +Run a Grafana instance to provide a monitoring dashboard to a ceph +cluster. + +## Pre-requisites +### Monitoring host +- docker and docker-compose (for simplicity) +- grafana image (official latest 4.3 release from docker hub) +- graphite image (docker.io/abezhenar/graphite-centos7) +- clone the cephmetrics repo (docker configuration, dashboards) +- host that will run the monitor should have passwordless ssh to all the ceph +nodes +- the storage for the graphite database should be on SSD/flash if possible +- needs PyYAML, tested with python 2.7.13 + +### Ceph Cluster Nodes +- collectd rpm (5.7 or above) + +## Installation Sequence +Install the monitoring endpoint first, and then apply the collectd configuration +to each of the ceph nodes. + + +## Setting Up the monitoring endpoint +On the monitoring host, perform the following steps; +1. Pull the required docker images (*listed above*) +2. we need to persist the grafana configuration db and settings, as well as the +graphite data. +```markdown +mkdir -p /opt/docker/grafana/etc +mkdir -p /opt/docker/grafana/data/plugins +mkdir -p /opt/docker/graphite +``` +3. Download the additional status panel plugin +```markdown +cd /opt/docker/grafana/data/plugins +wget https://grafana.com/api/plugins/vonage-status-panel/versions/1.0.4/download +unzip download +rm -f download +``` +4. Copy the seed .ini file for grafana to the containers etc directory, and reset +the permissions to be compatible with the containers +```markdown +cp etc/grafana/grafana.ini /opt/docker/grafana/etc +chown -R 104:107 /opt/docker/grafana +chown -R 997 /opt/docker/graphite +chmod g+w /opt/docker/graphite + +``` +5. Edit the docker-compose.yml example (if necessary) +6. From the directory with the compose file, issue +``` +docker-compose up -d +``` +7. check that the containers are running and the endpoints are listening +7.1 Use ```docker ps``` +7.2 use ```netstat``` and look for the following ports: 3000,80,2003,2004,7002 +7.3 open a browser and connect to graphite - it should be running on port 80 of +the local machine +8. Add the graphite instance as a datasource to grafana +8.1 update setup/add_datasource.json with the IP of the host machine +8.2 register the graphite instance to grafana as the default data source +```markdown +curl -u admin:admin -H "Content-Type: application/json" -X POST http://localhost:3000/api/datasources \ +--data-binary @setup/add_datasource.json +``` +9. Install the grafana labs pie-chart plugin +9.1 open a shell session to the grafana instance, and install the plugin +```markdown +docker exec -it grafana bash +grafana-cli plugins install grafana-piechart-panel +``` +10. the sample dashboards need to be added/edited to reflect the ceph cluster to +monitor +10.1 seed dashboards are provided in the dashboards/current directory +10.2 edit ```dashboard.yml``` with the shortnames of the OSD's and RGW's, plus +the dns domain name of the environment. +10.3 run the following command +```markdown +python dashUpdater.py +``` + + +### Updating the dashboards +After adding ceph nodes to the configuration, update the ```dashboard.yml``` +file, and then rerun the ```dashUpdater.py``` script. + + +## Configuration on Each Ceph Node +You may need to update your SELINUX policy to allow the write_graphite plugin +to access outbound on port 2003. To test, simply disable SELINUX +1. install collectd (this will also require libcollectdclient) +2. create the required directories for the cephmetrics collectors (see known +issues [2]) +```markdown +mkdir -p /usr/lib64/collectd/python-plugins/collectors +``` +3. copy the collectors to the directory created in [2], and cephmetrics.py +to /usr/lib64/collectd/python-plugins +4. Setup the collectd plugins +4.1 Update the write_graphite.conf file to specify the hostname where the +grafana/graphite environment is (use a hostname not IP - anecdotally I found that +with an IP the plugin fails to connect to the graphite container port?) +4.2 copy the example plugin files to the /etc/collectd.d directory (i.e. cpu.conf, +memory.conf etc) +5. update the "ClusterName" parameter in the cephmetrics plugin file to match + the name of your ceph cluster (default is 'ceph') +6. copy the example collectd.conf file to the ceph node (or update the existing +configuration to ensure there is a ```Include "/etc/collectd.d/*.conf"``` entry) +7. enable collectd +8. start collectd +9. check collectd is running without errors + +## Known Issues +1. Following a reboot of an OSD node, the cephmetrics collectd plugin doesn't send disk +stats. ***Workaround**: Following the reboot of an OSD, restart the collectd service.* +2. the cephmetrics.py and collectors should be installed through python-setuptools to cut down on +the installation steps. +3. SELINUX may block the write_graphite plugin writing outbound on port 2003 + + + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README b/README new file mode 100644 index 0000000..b6fd253 --- /dev/null +++ b/README @@ -0,0 +1,58 @@ +On the ceph node install collectd +- tested with collectd-5.7.0-4.el7ost.x86_64 + +Interval is set to 10 in collectd.conf + +write_graphite plugin configured as follows; + +LoadPlugin "write_graphite" + + + Host "192.168.1.52" + Port "2003" + Protocol "tcp" + LogSendErrors true + Prefix "collectd." + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + PreserveSeparator true + SeparateInstances true + + + +5.7 introduces the PreserveSeparator parm, allowing the type instance name to +contain '.'. At the moment I used .'s in the metric name since the same plugin +provides all metrics. + +Comments welcome! + +This is what an entry looks like as seen in influx +collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._rgw_root.num_bytes_recovered + | | \ \ \ \ + | | \ \ \ \ + | | | \ \ \ +prefix | hostname | plugin name | type |ceph|metric| metric name + name group + +In the case of pools, the metric name is prefixed by the pool name + +Added cephmetrics.conf to /etc/collectd.d dir + +mkdir -p /usr/lib64/collectd/python-plugins + +to the plugin dir, I copied + - cephmetrics.py + - collectors dir + + +Known Issues +1. Singlestat panels don't track the graph panel values 100% of the time + + + + +Container Configuration - pulled from docker.io registry +- grafana - grafana/grafana (official image) +- graphite - nickstenning/graphite - https://hub.docker.com/r/nickstenning/graphite/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..efe3413 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# cephmetrics + +Cephmetrics is a tool that allows a user to visually monitor various metrics in a running Ceph cluster. + +## Prerequisites +- RHEL 7 should be running on all hosts +- A functional ceph cluster running version ceph-osd-10.2.7-27.el7cp.x86_64 or later is already up and running. +- Another host machine independent of the ceph machines must be available. This host will be used to receive data pushed by the hosts in the Ceph cluster, and will run the dashboard to display that data. +- A host machine on which to execute `ansible-playbook` to orchestrate the deployment must be available. +- Passwordless SSH access from the deploy host to the ceph hosts. The username should be the same for all hosts. +- Passwordless sudo access on the ceph and dashboard hosts +- All hosts must share the same DNS domain + +## Resulting configuration + +After running this procedure, you will have the following configuration. +- The ceph nodes will have `collectd` installed, along with collector plugins from `cephmetrics-collectd` +- The dashboard host will have `grafana` installed and configured to display various dashboards by querying data received from Ceph nodes via a `graphite-web`, `python-carbon`, and `python-whisper` stack. + +## Installation + +### Install cephmetrics-ansible + +First, decide which machine you want to use to run `ansible-playbook`. If you used [`ceph-ansible`](https://github.com/ceph/ceph-ansible) to set up your cluster, you may want to reuse that same host to take advantage of the inventory file that was created as part of that process. + +Once the host is selected, perform the following steps there. This will install a repo which includes the cephmetrics installation code and ansible (version 2.2.3 or later): +``` +sudo su - +mkdir ~/cephmetrics +subscription-manager repos --enable rhel-7-server-optional-rpms --enable rhel-7-server-rhscon-2-installer-rpms +curl -L -o /etc/yum.repos.d/cephmetrics.repo http://download.ceph.com/cephmetrics/rpm-master/el7/cephmetrics.repo +yum install cephmetrics-ansible +``` + +### Create or edit the inventory file + +Next, we need an inventory file. If you are running `ansible-playbook` on a host that previously ran `ceph-ansible`, you may simply modify `/etc/ansible/hosts`; otherwise you may copy `/usr/share/cephmetrics-ansible/inventory.sample` and modify it if you wish. + +The inventory file format looks like: + + [ceph-grafana] + grafana_host.example.com + + [osds] + osd0.example.com + osd1.example.com + osd2.example.com + + [mons] + mon0.example.com + mon1.example.com + mon2.example.com + + [mdss] + mds0.example.com + + [rgws] + rgw0.example.com + +If you are running `ansible-playbook` on a host mentioned in the inventory file, you will need to append `ansible_connection=local` to each line in the inventory file that mentions that host. An example: + ``` + my_host.example.com ansible_connection=local + ``` +Omit the mdss section if no ceph mds nodes are installed. Omit the rgws section if no rgw nodes are installed. + +Ansible variables can be set in a `vars.yml` file if necessary. If it is required, make sure to add `-e '@/path/to/vars.yml` to your `ansible-playbook` invocation below. [Click here](./ansible/README.md) for more information. + +## Deploy via ansible-playbook + +If you are using a `ceph-ansible` host, run these commands: +``` +cd /usr/share/cephmetrics-ansible +ansible-playbook -v playbook.yml +``` + +Otherwise, run these commands: +``` +cd /usr/share/cephmetrics-ansible +ansible-playbook -v -i /path/to/inventory playbook.yml +``` + +Note: The reason it is necessary to change directories is so that `ansible-playbook` will use the bundled `ansible.cfg`; there is currently no command-line argument allowing the specification of an arbitrary `.cfg` file. diff --git a/TODO b/TODO new file mode 100644 index 0000000..cc52402 --- /dev/null +++ b/TODO @@ -0,0 +1,25 @@ + +collectd +- add network and cpu to all deployments +- define standard easy roll-out conf (use collectd.d for write_graphite, cephmetrics, cpu and network) + +Dashboard + + +collectd : cephmetrics +- add metrics starting message so you know collection is active + +Python Modules +Mon +- + +RGW +- + + + +Completed Items +05/24 RGW: implement the latencies as different metrics to allow summarisation at the db layer +05/24 MON: add ceph health status (OK/WARN) to output dict +05/24 Dashboard: Add ceph health text +05/29 OSD: OSD metrics added, with dashboard updates \ No newline at end of file diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000..40a7930 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,81 @@ +# Deploying cephmetrics with ansible + +This set of ansible roles, in combination with `playbook.yml`, provide a way to deploy cephmetrics to monitor a Ceph cluster. + +## Prerequisites +- RHEL 7 is supported with `devel_mode` set to `True` or `False`. Ubuntu 16.04 is supported only when `devel_mode` is `True` at this point. +- Currently only RHEL 7 is supported for all hosts +- A functional [ceph](https://ceph.com/) cluster. [collectd](https://collectd.org/) will be used to collect metrics +- A separate host to receive data pushed by hosts in the Ceph cluster, and run the dashboard to display that data. +- An inventory file describing your cluster. +- A host on which to execute `ansible-playbook` to orchestrate the deployment. This can be the same as the dashboard host. +- Passwordless SSH access from the deploy host to the ceph hosts. The username should be the same for all hosts. +- Passwordless sudo access on the ceph and dashboard hosts +- All hosts must share the same DNS domain + +## Example inventory file + + [ceph-grafana] + cephmetrics.example.com + + [osds] + osd0.example.com + osd1.example.com + osd3.example.com + + [mons] + mon0.example.com + mon1.example.com + mon2.example.com + + [mdss] + mds0.example.com + + [rgws] + rgw0.example.com + +Notes: +- Omit any sections from the inventory file for which your cluster has no hosts. +- If you are running `ansible-playbook` directly on the dashboard (`ceph-grafana`) host, its inventory entry should look like: + ``` + [ceph-grafana] + cephmetrics.example.com ansible_connection=local + ``` + +## Roles +- [ceph-collectd](./roles/ceph-collectd/): Used for ceph cluster hosts +- [ceph-grafana](./roles/ceph-grafana/): Used for the dashboard host + +## Variables +You may override certain variables by creating a `vars.yml` file: +- `ansible_ssh_user`: The user account use for SSH connections. This may also be set on a per-host basis in the inventory file. +- `cluster`: The name of the Ceph cluster. Default: ceph +- `firewalld_zone`: The `firewalld` zone to use when opening ports for Grafana and Carbon. Default: public +- `devel_mode`: Whether to perform a development-mode deployment vs. a production deployment. Default: true +- `whisper`: May be used to configure [whisper retention](http://graphite.readthedocs.io/en/latest/config-carbon.html#storage-schemas-conf) settings. Default: + ``` + whisper: + retention: + - ['10s', '7d'] + - ['1m', '30d'] + - ['15m', '5y'] + ``` + +These variables are only relevent when `devel_mode` is true: +- `use_epel`: Whether or not to use EPEL and grafana.com instead of ceph.com-sourced packages for dependencies. Default: false + +## Current Limitations + +- Currently, metrics are only *displayed* for `osd` and `rgw` hosts. +- Authentication for grafana and graphite is fixed and creates a user `admin` with password `admin`. +- Services are deployed on the dashboard host directly; there is not yet support for a containerized deployment. + +## Usage +If you are not overriding any variables: +``` + ansible-playbook -v -i ./inventory +``` +Or, if you are: +``` + ansible-playbook -v -i ./inventory -e '@vars.yml' +``` diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..9c3ef13 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,4 @@ +[defaults] +roles_path: ./roles/ +host_key_checking: False +forks: 50 diff --git a/ansible/common/files/cephmetrics-devel.repo b/ansible/common/files/cephmetrics-devel.repo new file mode 100644 index 0000000..48d8733 --- /dev/null +++ b/ansible/common/files/cephmetrics-devel.repo @@ -0,0 +1,20 @@ +[cephmetrics] +name=cephmetrics packages for \$basearch +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/\$basearch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-noarch] +name=cephmetrics noarch packages +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/noarch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-source] +name=cephmetrics source packages +baseurl=https://chacra.ceph.com/r/cephmetrics/master/HEAD/rhel/7/flavors/default/SRPMS +enabled=1 +gpgcheck=0 +type=rpm-md diff --git a/ansible/common/files/cephmetrics-prod.repo b/ansible/common/files/cephmetrics-prod.repo new file mode 100644 index 0000000..990a5bc --- /dev/null +++ b/ansible/common/files/cephmetrics-prod.repo @@ -0,0 +1,20 @@ +[cephmetrics] +name=cephmetrics packages for \$basearch +baseurl=http://download.ceph.com/cephmetrics/rpm-master/el7/\$basearch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-noarch] +name=cephmetrics noarch packages +baseurl=http://download.ceph.com/cephmetrics/rpm-master/el7/noarch +enabled=1 +gpgcheck=0 +type=rpm-md + +[cephmetrics-source] +name=cephmetrics source packages +baseurl=http://download.ceph.com/cephmetrics/rpm-master/el7/SRPMS +enabled=0 +gpgcheck=0 +type=rpm-md diff --git a/ansible/inventory.sample b/ansible/inventory.sample new file mode 100644 index 0000000..d3e8301 --- /dev/null +++ b/ansible/inventory.sample @@ -0,0 +1,18 @@ +[ceph-grafana] +grafana_host.example.com + +[osds] +osd0.example.com +osd1.example.com +osd2.example.com + +[mons] +mon0.example.com +mon1.example.com +mon2.example.com + +[mdss] +mds0.example.com + +[rgws] +rgw0.example.com diff --git a/ansible/playbook.yml b/ansible/playbook.yml new file mode 100644 index 0000000..05bed50 --- /dev/null +++ b/ansible/playbook.yml @@ -0,0 +1,32 @@ +--- +- hosts: + - ceph-grafana + become: true + roles: + - ceph-grafana + +- hosts: + # These are roles used by ceph-ansible + - mons + - agents + - osds + - mdss + - rgws + - nfss + - restapis + - rbdmirrors + - clients + - mgrs + # This role is (so far) only used for testing + - cluster + become: true + roles: + - ceph-collectd + +- hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Print dashboard URL + debug: + msg: "All done! You may access your dashboard at http://{{ groups['ceph-grafana'][0] }}:3000/ with user 'admin' and password 'admin'." diff --git a/ansible/purge.yml b/ansible/purge.yml new file mode 100644 index 0000000..f640505 --- /dev/null +++ b/ansible/purge.yml @@ -0,0 +1,90 @@ +--- +- name: purge grafana host + hosts: + - ceph-grafana + become: true + tasks: + - name: Stop and disable services + service: + name: "{{ item }}" + enabled: no + state: stopped + with_items: + - grafana-server + - carbon-cache + - httpd + failed_when: false + + - name: Remove packages + package: + name: "{{ item }}" + state: absent + with_items: + - graphite-web + - python-carbon + - grafana + - cephmetrics + + - name: Remove files + file: + dest: "{{ item }}" + state: absent + with_items: + - /var/lib/graphite + - /var/lig/graphite-web + - /var/lib/grafana + - /var/lib/carbon + - /etc/grafana/grafana.ini + - /etc/carbon/storage-schemas.conf + - /etc/httpd/conf.d/graphite-web.conf + - /etc/yum.repos.d/cephmetrics.repo + - /etc/yum.repos.d/grafana.repo + - /tmp/dashboard.yml + - /tmp/dashUpdater.py + - /tmp/dashboards + +- name: purge collectd hosts + hosts: + # These are roles used by ceph-ansible + - mons + - agents + - osds + - mdss + - rgws + - nfss + - restapis + - rbdmirrors + - clients + - mgrs + # This role is (so far) only used for testing + - cluster + become: true + tasks: + - name: Stop and disable collectd + service: + name: collectd + enabled: no + state: stopped + failed_when: false + + - name: Remove packages + package: + name: "{{ item }}" + state: absent + with_items: + - cephmetrics-collectors + - collectd + + - name: Remove files + file: + dest: "{{ item }}" + state: absent + with_items: + - /etc/collectd.d/cephmetrics.conf + - /etc/collectd.d/cpu.conf + - /etc/collectd.d/memory.conf + - /etc/collectd.d/nics.conf + - /etc/collectd.d/write_graphite.conf + - /etc/collectd.conf + - /etc/yum.repos.d/cephmetrics.repo + - /usr/lib64/collectd diff --git a/ansible/roles/ceph-collectd/defaults/main.yml b/ansible/roles/ceph-collectd/defaults/main.yml new file mode 100644 index 0000000..b3652a8 --- /dev/null +++ b/ansible/roles/ceph-collectd/defaults/main.yml @@ -0,0 +1,25 @@ +--- +containerized_deployment: false +cluster: ceph +use_epel: false +devel_mode: true +collector_dependencies: + yum: + # For the json python module + - python-libs + # For the rados python module + - python-rados + # For the ceph_daemon python module + - ceph-common + # For semodule + - make + - libsemanage-python + - policycoreutils-python + - selinux-policy-devel + apt: + # For the json module, via libpython2.7-stdlib + - python2.7 + # For the rados python module + - python-rados + # For the ceph_daemon python module + - ceph-common diff --git a/ansible/roles/ceph-collectd/files/cephmetrics-devel.repo b/ansible/roles/ceph-collectd/files/cephmetrics-devel.repo new file mode 120000 index 0000000..0b366fa --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics-devel.repo @@ -0,0 +1 @@ +../../../common/files/cephmetrics-devel.repo \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/cephmetrics-prod.repo b/ansible/roles/ceph-collectd/files/cephmetrics-prod.repo new file mode 120000 index 0000000..9014d73 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics-prod.repo @@ -0,0 +1 @@ +../../../common/files/cephmetrics-prod.repo \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/cephmetrics.py b/ansible/roles/ceph-collectd/files/cephmetrics.py new file mode 120000 index 0000000..8de2567 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics.py @@ -0,0 +1 @@ +../../../../cephmetrics.py \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/cephmetrics.te b/ansible/roles/ceph-collectd/files/cephmetrics.te new file mode 120000 index 0000000..4d66f82 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/cephmetrics.te @@ -0,0 +1 @@ +../../../../selinux/cephmetrics.te \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/collectors b/ansible/roles/ceph-collectd/files/collectors new file mode 120000 index 0000000..5efd446 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/collectors @@ -0,0 +1 @@ +../../../../collectors \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/files/etc b/ansible/roles/ceph-collectd/files/etc new file mode 120000 index 0000000..5221ea5 --- /dev/null +++ b/ansible/roles/ceph-collectd/files/etc @@ -0,0 +1 @@ +../../../../etc \ No newline at end of file diff --git a/ansible/roles/ceph-collectd/handlers/main.yml b/ansible/roles/ceph-collectd/handlers/main.yml new file mode 100644 index 0000000..bd989ee --- /dev/null +++ b/ansible/roles/ceph-collectd/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart collectd + service: + name: collectd + enabled: yes + state: restarted diff --git a/ansible/roles/ceph-collectd/tasks/configure_collectd.yml b/ansible/roles/ceph-collectd/tasks/configure_collectd.yml new file mode 100644 index 0000000..d4df132 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/configure_collectd.yml @@ -0,0 +1,55 @@ +--- +- name: Set collectd_conf + set_fact: + collectd_conf: "/etc/{{ 'collectd/' if ansible_pkg_mgr == 'apt' else '' }}collectd.conf" + +- name: Set collectd_conf_d + set_fact: + collectd_conf_d: "/etc/collectd{{ '/collectd.conf' if ansible_pkg_mgr == 'apt' else '' }}.d" + +- name: Ship collectd.conf + copy: + src: files/etc/collectd.conf + dest: "{{ collectd_conf }}" + notify: Restart collectd + +- name: Set PluginDir in collectd.conf + replace: + dest: "{{ collectd_conf }}" + regexp: 'PluginDir ".*"' + replace: 'PluginDir "{{ collectd_dir }}"' + notify: Restart collectd + +- name: Set Include path in collectd.conf + replace: + dest: "{{ collectd_conf }}" + regexp: 'Include ".*"' + replace: 'Include "{{ collectd_conf_d }}/*.conf"' + notify: Restart collectd + +- name: Ship /etc/collectd.d or /etc/collectd/collectd.conf.d + copy: + src: files/etc/collectd.d/ + dest: "{{ collectd_conf_d }}" + notify: Restart collectd + +- name: Set hostname in write_graphite.conf + replace: + dest: "{{ collectd_conf_d }}/write_graphite.conf" + regexp: 'Host ".*"' + replace: "Host \"{{ groups['ceph-grafana'][0] }}\"" + notify: Restart collectd + +- name: Set cluster name in cephmetrics.conf + replace: + dest: "{{ collectd_conf_d }}/cephmetrics.conf" + regexp: 'ClusterName ".*"' + replace: 'ClusterName "{{ cluster }}"' + notify: Restart collectd + +- name: Set ModulePath in cephmetrics.conf + replace: + dest: "{{ collectd_conf_d }}/cephmetrics.conf" + regexp: 'ModulePath ".*"' + replace: 'ModulePath "{{ collectd_cephmetrics_dir }}"' + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml b/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml new file mode 100644 index 0000000..5af7367 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/install_collectd_plugins.yml @@ -0,0 +1,25 @@ +--- +- name: Set collectd_dir + set_fact: + collectd_dir: "/usr/lib{{ '64' if ansible_pkg_mgr == 'yum' else '' }}/collectd" + +- name: Set collectd_cephmetrics_dir + set_fact: + collectd_cephmetrics_dir: "{{ collectd_dir }}/cephmetrics" + +- name: Create collectors directory + file: + name: "{{ collectd_cephmetrics_dir }}/collectors" + state: directory + +- name: Ship collector plugins + copy: + src: files/collectors/ + dest: "{{ collectd_cephmetrics_dir }}/collectors" + notify: Restart collectd + +- name: Ship cephmetrics.py + copy: + src: files/cephmetrics.py + dest: "{{ collectd_cephmetrics_dir }}" + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/install_packages.yml b/ansible/roles/ceph-collectd/tasks/install_packages.yml new file mode 100644 index 0000000..b290ac3 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/install_packages.yml @@ -0,0 +1,36 @@ +--- +- name: Install collectd + package: + name: collectd + state: latest + when: + - devel_mode + notify: Restart collectd + +- name: Install collectd-python + package: + name: collectd-python + state: latest + when: + - ansible_pkg_mgr == "yum" + - devel_mode + - not use_epel + notify: Restart collectd + +- name: Install cephmetrics-collectors + package: + name: cephmetrics-collectors + state: latest + when: + - ansible_pkg_mgr == "yum" + - not devel_mode + notify: Restart collectd + +- name: Install dependencies for collector plugins + package: + name: "{{ item }}" + state: latest + with_items: "{{ collector_dependencies[ansible_pkg_mgr] }}" + when: + - devel_mode + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/main.yml b/ansible/roles/ceph-collectd/tasks/main.yml new file mode 100644 index 0000000..4ccf3df --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/main.yml @@ -0,0 +1,38 @@ +--- +- include: setup_repos.yml + when: + - not containerized_deployment + tags: + - packages + - repos + +- include: install_packages.yml + when: not containerized_deployment + tags: + - packages + +- include: install_collectd_plugins.yml + when: + - not containerized_deployment + - devel_mode + tags: + - collectors + +- include: configure_collectd.yml + when: not containerized_deployment + tags: + - collectors + +- include: selinux.yml + when: + - not containerized_deployment + - ansible_pkg_mgr == "yum" + - ansible_selinux.status is defined + - ansible_selinux.status == 'enabled' + tags: + - selinux + +- include: start_collectd.yml + when: not containerized_deployment + tags: + - services diff --git a/ansible/roles/ceph-collectd/tasks/selinux.yml b/ansible/roles/ceph-collectd/tasks/selinux.yml new file mode 100644 index 0000000..80885e3 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/selinux.yml @@ -0,0 +1,16 @@ +--- +- name: Enable collectd_tcp_network_connect SELinux boolean + seboolean: + name: collectd_tcp_network_connect + state: yes + persistent: yes + +- name: Restore SELinux context of OSD journals + shell: "restorecon -R -v /var/lib/ceph/osd/*/journal" + when: "'osds' in group_names" + register: restorecon + changed_when: restorecon.stdout|length != 0 or restorecon.stderr|length != 0 + +- include: selinux_module.yml + when: + - devel_mode diff --git a/ansible/roles/ceph-collectd/tasks/selinux_module.yml b/ansible/roles/ceph-collectd/tasks/selinux_module.yml new file mode 100644 index 0000000..715250c --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/selinux_module.yml @@ -0,0 +1,28 @@ +--- +- name: Remove SELinux policy package + command: semodule -r cephmetrics + failed_when: false + +- name: Remove any SELinux-related files + file: + path: "{{ item }}" + state: absent + with_items: + - /tmp/cephmetrics.fc + - /tmp/cephmetrics.if + - /tmp/cephmetrics.pp + - /tmp/cephmetrics.te + +- name: Copy SELinux type enforcement file + copy: + src: cephmetrics.te + dest: /tmp/cephmetrics.te + +- name: Build SELinux policy package + command: make -f /usr/share/selinux/devel/Makefile cephmetrics.pp + args: + chdir: /tmp + +- name: Load SELinux policy package + command: semodule -i /tmp/cephmetrics.pp + notify: Restart collectd diff --git a/ansible/roles/ceph-collectd/tasks/setup_repos.yml b/ansible/roles/ceph-collectd/tasks/setup_repos.yml new file mode 100644 index 0000000..226d391 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/setup_repos.yml @@ -0,0 +1,57 @@ +--- +- name: Enable EPEL + yum: + name: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" + state: "{{ 'present' if use_epel else 'absent' }}" + when: + - ansible_pkg_mgr == "yum" + - devel_mode + +- name: Add collectd repo + template: + src: collectd.list + dest: /etc/apt/sources.list.d/collectd.list + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Add collectd.org GPG key to apt + apt_key: + id: 3994D24FB8543576 + state: present + keyserver: ha.pool.sks-keyservers.net + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Update apt cache + apt: + update_cache: true + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Install cephmetrics development repo + copy: + src: cephmetrics-devel.repo + dest: /etc/yum.repos.d/cephmetrics.repo + when: + - ansible_pkg_mgr == "yum" + - devel_mode + +- name: Remove cephmetrics development repo + file: + path: /etc/yum.repos.d/cephmetrics.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - use_epel + - devel_mode + +- name: Install cephmetrics production repo + copy: + src: files/cephmetrics-prod.repo + dest: /etc/yum.repos.d/cephmetrics.repo + when: + - ansible_pkg_mgr == "yum" + - not devel_mode diff --git a/ansible/roles/ceph-collectd/tasks/start_collectd.yml b/ansible/roles/ceph-collectd/tasks/start_collectd.yml new file mode 100644 index 0000000..ec4d7c4 --- /dev/null +++ b/ansible/roles/ceph-collectd/tasks/start_collectd.yml @@ -0,0 +1,6 @@ +--- +- name: Start collectd + service: + name: collectd + enabled: yes + state: started diff --git a/ansible/roles/ceph-collectd/templates/collectd.list b/ansible/roles/ceph-collectd/templates/collectd.list new file mode 100644 index 0000000..5a12f86 --- /dev/null +++ b/ansible/roles/ceph-collectd/templates/collectd.list @@ -0,0 +1 @@ +deb http://pkg.ci.collectd.org/deb {{ ansible_distribution_release }} collectd-5.7 diff --git a/ansible/roles/ceph-grafana/defaults/main.yml b/ansible/roles/ceph-grafana/defaults/main.yml new file mode 100644 index 0000000..855f007 --- /dev/null +++ b/ansible/roles/ceph-grafana/defaults/main.yml @@ -0,0 +1,57 @@ +--- +defaults: + containerized_deployment: false + use_epel: false + devel_mode: true + replace_dashboards: true + grafana: + datasource: Local + # Note: changing this value won't update the password in Grafana itself; + # change it via the web UI and then override this value to reflect. + admin_password: admin + graphite: + service: "{{ 'graphite-web' if ansible_pkg_mgr == 'yum' else 'graphite-api' }}" + web_port: "{{ graphite_port | default('8080') }}" + api_port: 8888 + user: admin + password: admin + # The unix account running the graphite-web process + unix_user: + yum: apache + apt: _graphite + carbon: + unix_user: + yum: carbon + apt: _graphite + storage_dir: + yum: /var/lib/carbon + apt: /var/lib/graphite + whisper: + retention: + - ['10s', '7d'] + - ['1m', '30d'] + - ['15m', '5y'] + # The firewalld zone that carbon and grafana will use + firewalld_zone: public + # RHEL repos that need to be enabled with subscription-manager + rhsm_repos: + - rhel-7-server-rhscon-2-installer-rpms + - rhel-7-server-optional-rpms + devel_packages: + yum: + # unzip is needed to extract the Vonage plugin + - unzip + - graphite-web + - python-carbon + - grafana + apt: + # unzip is needed to extract the Vonage plugin + - unzip + - graphite-api + # For graphite-api + - gunicorn3 + - graphite-carbon + - grafana + # for dashUpdater.py + - python-yaml + - python-requests diff --git a/ansible/roles/ceph-grafana/files/cephmetrics-devel.repo b/ansible/roles/ceph-grafana/files/cephmetrics-devel.repo new file mode 120000 index 0000000..0b366fa --- /dev/null +++ b/ansible/roles/ceph-grafana/files/cephmetrics-devel.repo @@ -0,0 +1 @@ +../../../common/files/cephmetrics-devel.repo \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/cephmetrics-prod.repo b/ansible/roles/ceph-grafana/files/cephmetrics-prod.repo new file mode 120000 index 0000000..9014d73 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/cephmetrics-prod.repo @@ -0,0 +1 @@ +../../../common/files/cephmetrics-prod.repo \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/dashUpdater.py b/ansible/roles/ceph-grafana/files/dashUpdater.py new file mode 120000 index 0000000..a7871c4 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashUpdater.py @@ -0,0 +1 @@ +../../../../dashUpdater.py \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/dashboards b/ansible/roles/ceph-grafana/files/dashboards new file mode 120000 index 0000000..bf20664 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards @@ -0,0 +1 @@ +../../../../dashboards/current \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/grafana.ini b/ansible/roles/ceph-grafana/files/grafana.ini new file mode 120000 index 0000000..d27ea1e --- /dev/null +++ b/ansible/roles/ceph-grafana/files/grafana.ini @@ -0,0 +1 @@ +../../../../etc/grafana/grafana.ini \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/grafana.list b/ansible/roles/ceph-grafana/files/grafana.list new file mode 100644 index 0000000..886da8d --- /dev/null +++ b/ansible/roles/ceph-grafana/files/grafana.list @@ -0,0 +1 @@ +deb https://packagecloud.io/grafana/stable/debian/ jessie main diff --git a/ansible/roles/ceph-grafana/handlers/main.yml b/ansible/roles/ceph-grafana/handlers/main.yml new file mode 100644 index 0000000..00adc91 --- /dev/null +++ b/ansible/roles/ceph-grafana/handlers/main.yml @@ -0,0 +1,36 @@ +--- +- name: Restart Grafana + service: + name: grafana-server + state: restarted + enabled: true + +- name: Resize whisper databases + # xargs -P parallelizes execution; scale it to the number of cores on the system + shell: "find /var/lib/carbon/whisper -name '*.wsp' -print0 | xargs -n1 -0 -P {{ ansible_processor_vcpus }} -I {} whisper-resize {} {{ whisper_retention.replace(',', ' ') }}" + register: whisper_resize + failed_when: "'Traceback' in whisper_resize.stdout" + no_log: true + +- name: Restart graphite-web + service: + # graphite-web is served by httpd + name: httpd + state: restarted + enabled: true + when: + - "{{ graphite.service == 'graphite-web' }}" + +- name: Restart graphite-api + service: + name: graphite-api + state: restarted + enabled: true + when: + - "{{ graphite.service == 'graphite-api' }}" + +- name: Restart carbon-cache + service: + name: carbon-cache + state: restarted + enabled: true diff --git a/ansible/roles/ceph-grafana/tasks/configure_carbon.yml b/ansible/roles/ceph-grafana/tasks/configure_carbon.yml new file mode 100644 index 0000000..bc37ccb --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_carbon.yml @@ -0,0 +1,37 @@ +--- +- name: Set WHISPER_AUTOFLUSH to True + lineinfile: + dest: /etc/carbon/carbon.conf + regexp: "^WHISPER_AUTOFLUSH = .*" + insertafter: "^#.*buffering writes from the kernel.*" + line: "WHISPER_AUTOFLUSH = True" + notify: + - Restart carbon-cache + +- name: Fail when the the first whisper retention value is not 10s + fail: + msg: "The first whisper retention value must be '10s', not '{{ whisper.retention[0][0] }}' in order to match collectd's interval" + failed_when: whisper.retention[0][0] != '10s' + any_errors_fatal: true + +- name: Set whisper_retention + set_fact: + whisper_retention: "{{ whisper.retention|map('join', ':')|join(',') }}" + +- name: Configure retention for collectd stats + template: + src: storage-schemas.conf + dest: /etc/carbon/storage-schemas.conf + notify: + - Resize whisper databases + - Restart carbon-cache + +- name: Ensure carbon storage has the right ownership + file: + path: "{{ carbon.storage_dir[ansible_pkg_mgr] }}" + state: directory + owner: "{{ carbon.unix_user[ansible_pkg_mgr] }}" + group: "{{ carbon.unix_user[ansible_pkg_mgr] }}" + recurse: yes + notify: + - Restart carbon-cache diff --git a/ansible/roles/ceph-grafana/tasks/configure_firewall.yml b/ansible/roles/ceph-grafana/tasks/configure_firewall.yml new file mode 100644 index 0000000..d36c62d --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_firewall.yml @@ -0,0 +1,18 @@ +--- +- name: Check firewalld status + shell: "systemctl show firewalld | grep UnitFileState" + register: firewalld_status + failed_when: false + changed_when: false + +- name: Open ports for Grafana and Carbon + firewalld: + port: "{{ item }}" + zone: "{{ firewalld_zone }}" + state: enabled + immediate: true + permanent: true + with_items: + - 3000/tcp + - 2003/tcp + when: "'enabled' in firewalld_status.stdout" diff --git a/ansible/roles/ceph-grafana/tasks/configure_grafana.yml b/ansible/roles/ceph-grafana/tasks/configure_grafana.yml new file mode 100644 index 0000000..e8250a0 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_grafana.yml @@ -0,0 +1,134 @@ +--- +- name: Make sure grafana is down + service: + name: grafana-server + state: stopped + +- name: Wait for grafana to be stopped + wait_for: + port: 3000 + state: stopped + +- name: Write grafana.ini + copy: + src: files/grafana.ini + dest: /etc/grafana/grafana.ini + owner: root + group: grafana + mode: 0640 + tags: [ini] + +- name: Set domain in grafana.ini + lineinfile: + dest: /etc/grafana/grafana.ini + regexp: "^domain = .*" + insertafter: "^;domain = .*" + line: "domain = {{ ansible_fqdn }}" + tags: [ini] + +- name: Set admin_password in grafana.ini + lineinfile: + dest: /etc/grafana/grafana.ini + regexp: "^admin_password = .*" + insertafter: "^;admin_password = .*" + line: "admin_password = {{ grafana.admin_password }}" + no_log: true + tags: [ini] + +- include: grafana_plugins.yml + when: devel_mode + +- name: Enable and start grafana + service: + name: grafana-server + state: started + enabled: true + +- name: Wait for grafana to start + wait_for: + port: 3000 + +- name: Set grafana_data_source + set_fact: + grafana_data_source: > + { + "name":"{{ grafana.datasource }}", + "type":"graphite", + "url":"http://localhost:{{ graphite.web_port if graphite.service == 'graphite-web' else graphite.api_port }}", + "access":"proxy", + "basicAuth":false, + "isDefault":true + } + +- name: Add data source + uri: + url: http://localhost:3000/api/datasources + method: POST + user: admin + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + body_format: json + body: "{{ grafana_data_source }}" + # If we get a 409 Conflict, it means we're already set up. We'll update + # after this. + status_code: 200,409 + register: grafana_data_source_result + +- name: Get datasource ID + uri: + url: "http://localhost:3000/api/datasources/id/{{ grafana.datasource }}" + method: GET + user: "{{ graphite.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + register: grafana_data_source_id + when: grafana_data_source_result is defined and grafana_data_source_result.status == 409 + +- name: Update datasource + uri: + url: "http://localhost:3000/api/datasources/{{ grafana_data_source_id.json.id }}" + method: PUT + user: "{{ graphite.user }}" + password: "{{ grafana.admin_password }}" + force_basic_auth: yes + body_format: json + body: "{{ grafana_data_source }}" + status_code: 200 + when: grafana_data_source_result is defined and grafana_data_source_result.status == 409 + +- name: Ship dashboard templates + copy: + src: files/dashboards + dest: /tmp/ + when: devel_mode + +- name: Ship dashUpdater.py + copy: + src: files/dashUpdater.py + dest: /tmp/dashUpdater.py + when: devel_mode + +- name: Write dashboard.yml + template: + src: dashboard.yml + dest: /tmp/dashboard.yml + mode: 0600 + +- name: Set dashupdate_cmd and dashboard_dir + set_fact: + dashupdate_cmd: "python /tmp/dashUpdater.py" + dashboard_dir: "/tmp/dashboards" + when: devel_mode + +- name: Set dashupdate_cmd and dashboard_dir + set_fact: + dashupdate_cmd: "/usr/libexec/cephmetrics/dashUpdater.py" + dashboard_dir: "/usr/share/cephmetrics/dashboards" + when: not devel_mode + +- name: Set dashupdate_mode + set_fact: + dashupdate_mode: "{{ 'refresh' if replace_dashboards else 'update' }}" + +- name: Push dashboards to Grafana + command: "{{ dashupdate_cmd }} -m {{ dashupdate_mode }} -c /tmp/dashboard.yml -D {{ dashboard_dir }}" diff --git a/ansible/roles/ceph-grafana/tasks/configure_graphite.yml b/ansible/roles/ceph-grafana/tasks/configure_graphite.yml new file mode 100644 index 0000000..e911108 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_graphite.yml @@ -0,0 +1,8 @@ +--- +- include: configure_graphite_web.yml + when: + - graphite.service == 'graphite-web' + +- include: configure_graphite_api.yml + when: + - graphite.service == 'graphite-api' diff --git a/ansible/roles/ceph-grafana/tasks/configure_graphite_api.yml b/ansible/roles/ceph-grafana/tasks/configure_graphite_api.yml new file mode 100644 index 0000000..2b85f2e --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_graphite_api.yml @@ -0,0 +1,9 @@ +--- +- name: Write graphite-api systemd files + template: + src: "{{ item }}" + dest: "/etc/systemd/system/{{ item }}" + with_items: + - graphite-api.socket + - graphite-api.service + notify: Restart graphite-api diff --git a/ansible/roles/ceph-grafana/tasks/configure_graphite_web.yml b/ansible/roles/ceph-grafana/tasks/configure_graphite_web.yml new file mode 100644 index 0000000..b0ced52 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/configure_graphite_web.yml @@ -0,0 +1,40 @@ +--- +- name: Create Graphite DB tables + command: /usr/bin/graphite-manage syncdb --noinput + become_user: "{{ graphite.unix_user[ansible_pkg_mgr] }}" + +- name: Build Graphite index + command: /usr/bin/graphite-build-index + become_user: "{{ graphite.unix_user[ansible_pkg_mgr] }}" + +- name: Set or unset port in main httpd config + lineinfile: + dest: /etc/httpd/conf/httpd.conf + regexp: "^Listen .*" + line: "Listen 80" + insertafter: "^#Listen .*" + state: "{{ 'present' if graphite.web_port == '80' else 'absent' }}" + notify: Restart graphite-web + +- name: Set port in graphite httpd config + lineinfile: + dest: /etc/httpd/conf.d/graphite-web.conf + regexp: "^Listen .*" + line: "Listen {{ graphite.web_port }}" + insertbefore: "^$" + replace: "" + notify: Restart graphite-web + +- name: Allow connecting to graphite without auth + lineinfile: + dest: /etc/httpd/conf.d/graphite-web.conf + line: " Require all granted" + insertafter: '' + state: present + notify: Restart graphite-web diff --git a/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml b/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml new file mode 100644 index 0000000..6ee13e6 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/grafana_plugins.yml @@ -0,0 +1,20 @@ +--- +- name: Create Grafana plugins directory + file: + name: /var/lib/grafana/plugins + state: directory + +- name: Set grafana_plugins + set_fact: + grafana_plugins: "['vonage-status-panel', 'grafana-piechart-panel']" + +- name: Install Grafana plugins + command: "grafana-cli plugins install {{ item }}" + with_items: "{{ grafana_plugins }}" + +- name: Update Grafana plugins + command: "grafana-cli plugins update {{ item }}" + with_items: "{{ grafana_plugins }}" + +- name: Update status-panel for readability within the 'light' theme + command: "sed -i.bak -e 's/green/rgb(1,167,1)/g' /var/lib/grafana/plugins/vonage-status-panel/dist/css/status_panel.css" diff --git a/ansible/roles/ceph-grafana/tasks/install_packages.yml b/ansible/roles/ceph-grafana/tasks/install_packages.yml new file mode 100644 index 0000000..9dfdbeb --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/install_packages.yml @@ -0,0 +1,13 @@ +--- +- name: Install packages + package: + name: "{{ item }}" + state: latest + with_items: "{{ devel_packages[ansible_pkg_mgr] }}" + when: devel_mode + +- name: Install cephmetrics + package: + name: cephmetrics + state: latest + when: not devel_mode diff --git a/ansible/roles/ceph-grafana/tasks/main.yml b/ansible/roles/ceph-grafana/tasks/main.yml new file mode 100644 index 0000000..3402bdf --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- include: merge_vars.yml + tags: + - always + +- include: setup_repos.yml + when: + - not containerized_deployment + tags: + - packages + - repos + +- include: install_packages.yml + when: not containerized_deployment + tags: + - packages + +- include: configure_firewall.yml + tags: + - firewall + +- include: configure_carbon.yml + when: not containerized_deployment + tags: + - carbon + +- include: configure_graphite.yml + when: not containerized_deployment + tags: + - graphite + +- include: configure_grafana.yml + when: not containerized_deployment + tags: + - grafana + +- name: Reload systemd + systemd: + daemon_reload: yes + # Even when just calling daemon-reload, ansible < 2.4 requires a name for this call + name: grafana-server + tags: + - packages + - services + +- include: start_services.yml + when: not containerized_deployment + tags: + - services diff --git a/ansible/roles/ceph-grafana/tasks/merge_vars.yml b/ansible/roles/ceph-grafana/tasks/merge_vars.yml new file mode 100644 index 0000000..649ebf1 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/merge_vars.yml @@ -0,0 +1,5 @@ +--- +- name: Combine default settings and user-defined variables + set_fact: {"{{ item }}": "{% if vars[item] is not defined %}{{ defaults[item] }}{% endif %}{% if vars[item] is mapping %}{{ defaults[item]|combine(vars[item]|default({})) }}{% endif %}"} + with_items: "{{ defaults.keys() }}" + no_log: true diff --git a/ansible/roles/ceph-grafana/tasks/setup_repos.yml b/ansible/roles/ceph-grafana/tasks/setup_repos.yml new file mode 100644 index 0000000..e793626 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/setup_repos.yml @@ -0,0 +1,83 @@ +--- +- name: Enable EPEL + yum: + name: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" + state: "{{ 'present' if use_epel else 'absent' }}" + when: + - ansible_pkg_mgr == "yum" + - devel_mode + +- name: Install cephmetrics development repo + copy: + src: files/cephmetrics-devel.repo + dest: /etc/yum.repos.d/cephmetrics.repo + when: + - ansible_pkg_mgr == "yum" + - not use_epel + - devel_mode + +- name: Remove cephmetrics repo + file: + path: /etc/yum.repos.d/cephmetrics.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - use_epel + - devel_mode + +- name: Add Grafana repo + template: + src: grafana.repo + dest: /etc/yum.repos.d/grafana.repo + when: + - ansible_pkg_mgr == 'yum' + - use_epel + - devel_mode + +- name: Remove Grafana repo + file: + path: /etc/yum.repos.d/grafana.repo + state: absent + when: + - ansible_pkg_mgr == "yum" + - not use_epel + - devel_mode + +- name: Install grafana repo + copy: + src: files/grafana.list + dest: /etc/apt/sources.list.d/grafana.list + when: + - ansible_pkg_mgr == "apt" + - devel_mode + +- name: Add packagecloud GPG key to apt + apt_key: + # This is the key used by the grafana repo + url: https://packagecloud.io/gpg.key + id: D59097AB + state: present + when: + - ansible_pkg_mgr == "apt" + - devel_mode + +- name: Update apt cache + apt: + update_cache: true + when: + - ansible_pkg_mgr == 'apt' + - devel_mode + +- name: Enable subscription-manager repos + command: "subscription-manager repos{% for repo in rhsm_repos %} --enable={{ repo }}{% endfor %}" + when: + - ansible_pkg_mgr == "yum" + - not devel_mode + +- name: Install cephmetrics production repo + copy: + src: files/cephmetrics-prod.repo + dest: /etc/yum.repos.d/cephmetrics.repo + when: + - ansible_pkg_mgr == "yum" + - not devel_mode diff --git a/ansible/roles/ceph-grafana/tasks/start_services.yml b/ansible/roles/ceph-grafana/tasks/start_services.yml new file mode 100644 index 0000000..a1c6552 --- /dev/null +++ b/ansible/roles/ceph-grafana/tasks/start_services.yml @@ -0,0 +1,11 @@ +--- +- name: Enable and start services + service: + name: "{{ item }}" + state: started + enabled: true + with_items: + - carbon-cache + # graphite-web is served by httpd + - "{{ 'httpd' if graphite.service == 'graphite-web' else graphite.service }}" + - grafana-server diff --git a/ansible/roles/ceph-grafana/templates/dashboard.yml b/ansible/roles/ceph-grafana/templates/dashboard.yml new file mode 100644 index 0000000..87f2759 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/dashboard.yml @@ -0,0 +1,23 @@ +--- +osd_servers: [{% for fqdn in groups['osds'] | default([]) %}{{ fqdn.split('.')[0] }},{% endfor %}] +rgw_servers: [{% for fqdn in groups['rgws'] | default([]) %}{{ fqdn.split('.')[0] }},{% endfor %}] +domain: "{{ ansible_domain }}" +_dashboards: + - alert-status + - ceph-at-a-glance + - ceph-backend-storage + - ceph-cluster + - ceph-osd-information + - ceph-pools + - ceph-rgw-workload + - disk-busy-by-server + - iops-by-server + - latency-by-server + - network-usage-by-node + - osd-node-detail +_credentials: + user: admin + password: {{ grafana.admin_password }} +_grafana_port: 3000 +_home_dashboard: ceph-at-a-glance +_alert_dashboard: alert-status diff --git a/ansible/roles/ceph-grafana/templates/grafana.repo b/ansible/roles/ceph-grafana/templates/grafana.repo new file mode 100644 index 0000000..1ba7fb6 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/grafana.repo @@ -0,0 +1,9 @@ +[grafana] +name=grafana +baseurl=https://packagecloud.io/grafana/stable/el/{{ ansible_distribution_major_version }}/$basearch +repo_gpgcheck=1 +enabled=1 +gpgcheck=1 +gpgkey=https://packagecloud.io/gpg.key https://grafanarel.s3.amazonaws.com/RPM-GPG-KEY-grafana +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt diff --git a/ansible/roles/ceph-grafana/templates/graphite-api.service b/ansible/roles/ceph-grafana/templates/graphite-api.service new file mode 100644 index 0000000..446efc8 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/graphite-api.service @@ -0,0 +1,15 @@ +[Unit] +Description=Graphite-API service +Requires=graphite-api.socket + +[Service] +ExecStart=/usr/bin/gunicorn3 -w2 graphite_api.app:app +Restart=on-failure +User={{ graphite.unix_user[ansible_pkg_mgr] }} +Group={{ graphite.unix_user[ansible_pkg_mgr] }} +ExecReload=/bin/kill -s HUP $MAINPID +ExecStop=/bin/kill -s TERM $MAINPID +PrivateTmp=true + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/ceph-grafana/templates/graphite-api.socket b/ansible/roles/ceph-grafana/templates/graphite-api.socket new file mode 100644 index 0000000..f173e7b --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/graphite-api.socket @@ -0,0 +1,9 @@ +[Unit] +Description=graphite-api socket + +[Socket] +ListenStream=/run/graphite-api.sock +ListenStream=127.0.0.1:{{ graphite.api_port }} + +[Install] +WantedBy=sockets.target diff --git a/ansible/roles/ceph-grafana/templates/storage-schemas.conf b/ansible/roles/ceph-grafana/templates/storage-schemas.conf new file mode 100644 index 0000000..5237b32 --- /dev/null +++ b/ansible/roles/ceph-grafana/templates/storage-schemas.conf @@ -0,0 +1,19 @@ +# Schema definitions for Whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds. +# +# [name] +# pattern = regex +# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... +[collectd] +pattern = ^collectd\. +retentions = {{ whisper_retention }} + +# Carbon's internal metrics. This entry should match what is specified in +# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings +[carbon] +pattern = ^carbon\. +retentions = 60:90d + +[default_1min_for_1day] +pattern = .* +retentions = 60s:1d diff --git a/cephmetrics.py b/cephmetrics.py new file mode 100644 index 0000000..bcd3737 --- /dev/null +++ b/cephmetrics.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +import os +import glob +import logging +import collectd + +from collectors.mon import Mon +from collectors.rgw import RGW +from collectors.osd import OSDs +from collectors.common import flatten_dict, get_hostname, freadlines + +__author__ = 'Paul Cuzner' + +PLUGIN_NAME = 'cephmetrics' + + +class Ceph(object): + def __init__(self): + self.cluster_name = None + self.host_name = get_hostname() + + self.mon_socket = None + self.rgw_socket = None + + self.mon = None + self.rgw = None + self.osd = None + + def probe(self): + """ + set up which collector(s) to use, based on what types of sockets we + find in /var/run/ceph + """ + + mon_socket = '/var/run/ceph/{}-mon.{}.asok'.format(self.cluster_name, + self.host_name) + if os.path.exists(mon_socket): + self.mon_socket = mon_socket + self.mon = Mon(self.cluster_name, + admin_socket=mon_socket) + + rgw_socket_list = glob.glob('/var/run/ceph/{}-client.rgw.*.' + 'asok'.format(self.cluster_name)) + + if rgw_socket_list: + rgw_socket = rgw_socket_list[0] + self.rgw = RGW(self.cluster_name, + admin_socket=rgw_socket) + + osd_socket_list = glob.glob('/var/run/ceph/{}-osd.*' + '.asok'.format(self.cluster_name)) + mounted = freadlines('/proc/mounts') + osds_mounted = [mnt for mnt in mounted + if mnt.split()[1].startswith('/var/lib/ceph')] + if osd_socket_list or osds_mounted: + self.osd = OSDs(self.cluster_name) + + collectd.info("{}: Roles detected - mon:{} " + "osd:{} rgw:{}".format(__name__, + isinstance(self.mon, Mon), + isinstance(self.osd, OSDs), + isinstance(self.rgw, RGW))) + + +def write_stats(role_metrics, stats): + + flat_stats = flatten_dict(stats, '.') + + for key_name in flat_stats: + attr_name = key_name.split('.')[-1] + + # TODO: this needs some more think time, since the key from the name + # is not the key of the all_metrics dict + if attr_name in role_metrics: + attr_type = role_metrics[attr_name][1] # gauge / derive etc + else: + # assign a default + attr_type = 'gauge' + + attr_value = flat_stats[key_name] + + val = collectd.Values(plugin=PLUGIN_NAME, type=attr_type) + instance_name = "{}.{}".format(CEPH.cluster_name, + key_name) + val.type_instance = instance_name + val.values = [attr_value] + val.dispatch() + + +def configure_callback(conf): + + valid_log_levels = ['debug', 'info'] + + global CEPH + module_parms = {node.key: node.values[0] for node in conf.children} + + log_level = module_parms.get('LogLevel', 'debug') + if log_level not in valid_log_levels: + collectd.error("LogLevel specified is invalid - must" + " be :{}".format(' or '.join(valid_log_levels))) + + if 'ClusterName' in module_parms: + cluster_name = module_parms['ClusterName'] + # cluster name is all we need to get started + if not os.path.exists('/etc/ceph/{}.conf'.format(cluster_name)): + collectd.error("Clustername given ('{}') not found in " + "/etc/ceph".format(module_parms['ClusterName'])) + + # let's assume the conf file is OK to use + CEPH.cluster_name = cluster_name + + setup_module_logging(log_level) + + CEPH.probe() + + else: + collectd.error("ClusterName is required") + + +def setup_module_logging(log_level): + + level = {"debug": logging.DEBUG, + "info": logging.INFO} + + logging.getLogger('cephmetrics') + logging.basicConfig(filename='/var/log/collectd-cephmetrics.log', + format='%(asctime)s - %(levelname)-7s - ' + '[%(filename)s:%(lineno)s:%(funcName)s() - ' + '%(message)s', + level=level.get(log_level)) + + +def read_callback(): + + if CEPH.mon: + mon_stats = CEPH.mon.get_stats() + write_stats(Mon.all_metrics, mon_stats) + + if CEPH.rgw: + rgw_stats = CEPH.rgw.get_stats() + write_stats(RGW.all_metrics, rgw_stats) + + if CEPH.osd: + osd_node_stats = CEPH.osd.get_stats() + write_stats(OSDs.all_metrics, osd_node_stats) + + +if __name__ == '__main__': + + # run interactively or maybe test the code + collectd.info("In main for some reason !") + pass + +else: + + CEPH = Ceph() + + collectd.register_config(configure_callback) + collectd.register_read(read_callback) diff --git a/cephmetrics.spec.in b/cephmetrics.spec.in new file mode 100644 index 0000000..3171e96 --- /dev/null +++ b/cephmetrics.spec.in @@ -0,0 +1,131 @@ +%define debug_package %{nil} + +%{!?_selinux_policy_version: %global _selinux_policy_version %(sed -e 's,.*selinux-policy-\\([^/]*\\)/.*,\\1,' /usr/share/selinux/devel/policyhelp 2>/dev/null)} + +Name: cephmetrics +Version: @VERSION@ +Release: @RELEASE@%{?dist} +Summary: Monitoring service for Ceph clusters + +License: GPLv3 +URL: https://github.com/ceph/cephmetrics +Source0: cephmetrics-0.1.zip +Source1: vonage-status-panel-1.0.4.zip +Source2: grafana-piechart-panel-1.1.5.zip + +# SELinux deps +BuildRequires: checkpolicy +BuildRequires: selinux-policy-devel +BuildRequires: /usr/share/selinux/devel/policyhelp +BuildRequires: hardlink +Requires: policycoreutils, libselinux-utils +Requires(post): selinux-policy >= %{_selinux_policy_version}, policycoreutils +Requires(postun): policycoreutils + +Requires: graphite-web +Requires: python-carbon +Requires: cephmetrics-grafana-plugins = %{version}-%{release} +Requires: cephmetrics-collectors = %{version}-%{release} + +%description +The monitoring service with web frontend for Ceph storage clusters providing several statistical data graphed by grafana. + + +%package grafana-plugins +Summary: Vonage plugin for graphana +Requires: grafana +License: ASL 2.0 +%description grafana-plugins +The vonage status panel and piechart panel for grafana web server. + + +%package collectors +Summary: Ceph metrics collectors +Requires: collectd +Requires: collectd-python +%description collectors +The collectors for Ceph implemented with help of statistics collection daemon collectd. + + +%package ansible +Summary: Ansible playbooks for Ceph metrics +Requires: ceph-ansible +%description ansible +Ansible playbooks for Ceph metrics + + +%prep +%setup -q +unzip %SOURCE1 +mv -f Vonage* cephmetrics-vonage +unzip %SOURCE2 +mv -f grafana-piechart-panel* cephmetrics-piechart + + +%build +make -f /usr/share/selinux/devel/Makefile cephmetrics.pp + +# Change the devel_mode defaults +sed -i -e 's/devel_mode: true/devel_mode: false/' ansible/roles/*/defaults/main.yml + +# Support light mode better +sed -i -e 's/green/rgb(1,167,1)/g' cephmetrics-vonage/dist/css/status_panel.css + + +%install +# Install dashUpdater.py +install -d %{buildroot}%{_libexecdir}/cephmetrics +install -m 755 dashUpdater.py %{buildroot}%{_libexecdir}/cephmetrics/ +install -d %{buildroot}%{_datadir}/cephmetrics/dashboards +install -m 644 dashboards/current/* %{buildroot}%{_datadir}/cephmetrics/dashboards/ + +# Install vonage and piechart plugin +install -d %{buildroot}%{_localstatedir}/lib/grafana/plugins/ +cp -r cephmetrics-vonage %{buildroot}%{_localstatedir}/lib/grafana/plugins/ +cp -r cephmetrics-piechart %{buildroot}%{_localstatedir}/lib/grafana/plugins/ + +# Install collectors +install -d %{buildroot}%{_libdir}/collectd/cephmetrics/collectors +install -m 755 cephmetrics.py %{buildroot}%{_libdir}/collectd/cephmetrics +install -m 644 collectors/* %{buildroot}%{_libdir}/collectd/cephmetrics/collectors + +# Install ansible playbooks +install -d %{buildroot}%{_datadir} +cp -L -r ansible %{buildroot}%{_datadir}/cephmetrics-ansible + +# Install SELinux +install -d %{buildroot}%{_datadir}/selinux/packages +install -m 644 cephmetrics.pp %{buildroot}%{_datadir}/selinux/packages/cephmetrics.pp +exit 0 + + +%files +%{_libexecdir}/cephmetrics/dashUpdater.py +%{_datadir}/cephmetrics +%doc dashboard.yml +%doc etc/grafana +%doc LICENSE +%doc README + +%files grafana-plugins +%{_localstatedir}/lib/grafana/plugins/cephmetrics-vonage +%{_localstatedir}/lib/grafana/plugins/cephmetrics-piechart + +%files collectors +%{_libdir}/collectd/cephmetrics +%doc etc/collectd.conf +%doc etc/collectd.d +%{_datadir}/selinux/packages/cephmetrics.pp + +%post collectors +/usr/sbin/semodule -i %{_datadir}/selinux/packages/cephmetrics.pp &> /dev/null || : + +%postun collectors +if [ $1 == 0 ] ; then + /usr/sbin/semodule -r cephmetrics &> /dev/null || : +fi + +%files ansible +%{_datadir}/cephmetrics-ansible + +%changelog diff --git a/collectors/__init__.py b/collectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collectors/base.py b/collectors/base.py new file mode 100644 index 0000000..f2f2295 --- /dev/null +++ b/collectors/base.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import json +import time +import logging + +from ceph_daemon import admin_socket + + +class BaseCollector(object): + + def __init__(self, cluster_name, admin_socket=None): + self.cluster_name = cluster_name + self.admin_socket = admin_socket + + self.logger = logging.getLogger('cephmetrics') + + def _admin_socket(self, cmds=None, socket_path=None): + + adm_socket = self.admin_socket if not socket_path else socket_path + + if not cmds: + cmds = ['perf', 'dump'] + + start = time.time() + response = admin_socket(adm_socket, cmds, + format='json') + end = time.time() + + self.logger.debug("admin_socket call '{}' : " + "{:.3f}s".format(' '.join(cmds), + (end - start))) + + return json.loads(response) + + def get_stats(self): + + return {} diff --git a/collectors/common.py b/collectors/common.py new file mode 100644 index 0000000..1f4f178 --- /dev/null +++ b/collectors/common.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python + + +import socket +from os import statvfs +import math + + +def get_hostname(): + return socket.gethostname().split('.')[0] + + +def add_dicts(dict1, dict2): + """ + Add dictionary values together + :param dict1: + :param dict2: + :return: dict with matching fields sum'd together + """ + return {key: dict1.get(key, 0) + dict2.get(key, 0) + for key in set(dict1).union(dict2)} + + +def merge_dicts(dict1, dict2): + """ + merges two dicts together to form a single dict. when dict keys overlap + the value in the 2nd dict takes precedence + :param dict1: + :param dict2: + :return: combined dict + """ + + new = dict1.copy() + new.update(dict2) + + return new + + +def flatten_dict(data, separator='.', prefix=''): + """ + flatten a dict, so it is just simple key/value pairs + :param data: (dict) + :param separator: (str) char to use when combining keys + :param prefix: key prefix + :return: + """ + return {prefix + separator + k if prefix else k: v + for kk, vv in data.items() + for k, v in flatten_dict(vv, separator, kk).items() + } if isinstance(data, dict) else {prefix: data} + + +def todict(obj): + """ + convert an object to a dict representation + :param obj: (object) object to examine, to extract variables/values from + :return: (dict) representation of the given object + """ + data = {} + for key, value in obj.__dict__.iteritems(): + + if key.startswith('_'): + continue + + try: + data[key] = todict(value) + except AttributeError: + data[key] = value + + return data + + +def fread(file_name=None): + """ + Simple read function for files of a single value + :param file_name: (str) file name to read + :return: (str) contents of the file + """ + + with open(file_name, 'r') as f: + setting = f.read().rstrip() + return setting + + +def freadlines(file_name=None): + """ + simple readlines function to return all records of a given file + :param file_name: (str) file name to read + :return: (list) contents of the file + """ + + with open(file_name, 'r') as f: + data = f.readlines() + return data + + +class IOstat(object): + raw_metrics = [ + "_reads", + "_reads_mrgd", + "_sectors_read", + "_read_ms", + "_writes", + "_writes_mrgd", + "_sectors_written", + "_write_ms", + "_current_io", + "_ms_active_io", + "_ms_active_io_w" + ] + + sector_size = 512 + + metrics = { + "iops": ("iops", "gauge"), + "r_iops": ("r_iops", "gauge"), + "w_iops": ("w_iops", "gauge"), + "bytes_per_sec": ("bytes_per_sec", "gauge"), + "r_bytes_per_sec": ("r_bytes_per_sec", "gauge"), + "w_bytes_per_sec": ("w_bytes_per_sec", "gauge"), + "util": ("util", "gauge"), + "await": ("await", "gauge"), + "r_await": ("r_await", "gauge"), + "w_await": ("w_await", "gauge"), + } + + def __init__(self): + self._previous = [] + self._current = [] + + # Seed the metrics we're interested in + for ctr in IOstat.metrics.keys(): + setattr(self, ctr, 0) + + def __str__(self): + s = '\n- IOstat object:\n' + for key in sorted(vars(self)): + s += '\t{} ... {}\n'.format(key, getattr(self, key)) + return s + + def _calc_raw_delta(self): + if not self._previous: + # nothing to compute yet + for ptr in range(len(IOstat.raw_metrics)): + key = IOstat.raw_metrics[ptr] + setattr(self, key, 0) + else: + for ptr in range(len(IOstat.raw_metrics)): + key = IOstat.raw_metrics[ptr] + setattr(self, key, (int(self._current[ptr]) - + int(self._previous[ptr]))) + + def compute(self, sample_interval): + """ + Calculate the iostats for this device + """ + + self._calc_raw_delta() + + if sample_interval > 0: + interval_ms = sample_interval * 1000 + total_io = self._reads + self._writes + self.util = float(self._ms_active_io) / interval_ms * 100 + self.iops = int(total_io) / sample_interval + self.r_iops = int(self._reads) / sample_interval + self.w_iops = int(self._writes) / sample_interval + self.await = float( + self._write_ms + self._read_ms) / total_io if total_io > 0 else 0 + self.w_await = float( + self._write_ms) / self._writes if self._writes > 0 else 0 + self.r_await = float( + self._read_ms) / self._reads if self._reads > 0 else 0 + self.r_bytes_per_sec = (float( + self._sectors_read * IOstat.sector_size)) / sample_interval + self.w_bytes_per_sec = (float( + self._sectors_written * IOstat.sector_size)) / sample_interval + self.bytes_per_sec = self.r_bytes_per_sec + self.w_bytes_per_sec + + +class Disk(object): + + metrics = { + "rotational": ("rotational", "gauge"), + "disk_size": ("disk_size", "gauge"), + "fs_size": ("fs_size", "gauge"), + "fs_used": ("fs_used", "gauge"), + "fs_percent_used": ("fs_percent_used", "gauge"), + "osd_id": ("osd_id", "gauge") + } + + osd_types = {"filestore": 0, + "bluestore": 1} + + def __init__(self, device_name, path_name=None, osd_id=None, + in_osd_type="filestore", encrypted=0): + + self._name = device_name + self._path_name = path_name + self._base_dev = Disk.get_base_dev(device_name) + self.osd_id = osd_id + + self.rotational = self._get_rota() + self.disk_size = self._get_size() + self.perf = IOstat() + self.fs_size = 0 + self.fs_percent_used = 0 + self.fs_used = 0 + self.encrypted = encrypted + self.osd_type = Disk.osd_types[in_osd_type] + + self.refresh() + + def _get_size(self): + return int(fread("/sys/block/{}/size".format(self._base_dev))) * 512 + + def _get_rota(self): + return int(fread("/sys/block/{}/queue/rotational".format(self._base_dev))) + + def _get_fssize(self): + s = statvfs("{}/whoami".format(self._path_name)) + fs_size = s.f_blocks * s.f_bsize + fs_used = fs_size - (s.f_bfree * s.f_bsize) + fs_percent_used = math.ceil((float(fs_used) / fs_size) * 100) + return fs_size, fs_used, fs_percent_used + + def refresh(self): + # only run the fs size update, if the _path_name is set. + if self._path_name: + self.fs_size, self.fs_used, self.fs_percent_used = self._get_fssize() + + @staticmethod + def get_base_dev(dev_name): + + # for intelcas devices, just use the device name as is + if dev_name.startswith('intelcas'): + device = dev_name + elif dev_name.startswith('nvme'): + if 'p' in dev_name: + device = dev_name[:(dev_name.index('p'))] + else: + device = dev_name + else: + # default strip any numeric ie. sdaa1 -> sdaa + device = filter(lambda ch: ch.isalpha(), dev_name) + + return device + diff --git a/collectors/mon.py b/collectors/mon.py new file mode 100644 index 0000000..7ca83c6 --- /dev/null +++ b/collectors/mon.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python + +import rados +import rbd +import json +import threading +import time +import logging + +from collectors.base import BaseCollector +from collectors.common import merge_dicts, get_hostname + + +class RBDScanner(threading.Thread): + + def __init__(self, cluster_name, pool_name): + self.cluster_name = cluster_name + self.pool_name = pool_name + self.num_rbds = 0 + self.logger = logging.getLogger('cephmetrics') + + threading.Thread.__init__(self) + + def run(self): + rbd_images = [] + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + self.logger.debug("scan of '{}' starting".format(self.pool_name)) + with rados.Rados(conffile=conf_file) as cluster: + with cluster.open_ioctx(self.pool_name) as ioctx: + rbd_inst = rbd.RBD() + self.logger.debug("listing rbd's in {}".format(self.pool_name)) + rbd_images = rbd_inst.list(ioctx) + + self.logger.info("pool scan complete for '{}'".format(self.pool_name)) + self.num_rbds = len(rbd_images) + + +class Mon(BaseCollector): + + health = { + "HEALTH_OK": 0, + "HEALTH_WARN": 4, + "HEALTH_ERR": 8 + } + + osd_state = { + "up": 0, + "down": 1 + } + + # metrics are declared, where each element has a description and collectd + # data type. The description is used to ensure the names sent by collectd + # remain the same even if the source name changes in ceph. + cluster_metrics = { + "num_mon": ("num_mon", "gauge"), + "num_mon_quorum": ("num_mon_quorum", "gauge"), + "num_rbds": ("num_rbds", "gauge"), + "num_osd_hosts": ("num_osd_hosts", "gauge"), + "num_osd": ("num_osd", "gauge"), + "num_osd_up": ("num_osd_up", "gauge"), + "num_osd_in": ("num_osd_in", "gauge"), + "osd_epoch": ("osd_epoch", "gauge"), + "osd_bytes": ("osd_bytes", "gauge"), + "osd_bytes_used": ("osd_bytes_used", "gauge"), + "osd_bytes_avail": ("osd_bytes_avail", "gauge"), + "num_pool": ("num_pool", "gauge"), + "num_pg": ("num_pg", "gauge"), + "num_pg_active_clean": ("num_pg_active_clean", "gauge"), + "num_pg_active": ("num_pg_active", "gauge"), + "num_pg_peering": ("num_pg_peering", "gauge"), + "num_object": ("num_object", "gauge"), + "num_object_degraded": ("num_object_degraded", "gauge"), + "num_object_misplaced": ("num_object_misplaced", "gauge"), + "num_object_unfound": ("num_object_unfound", "gauge"), + "num_bytes": ("num_bytes", "gauge"), + "num_mds_up": ("num_mds_up", "gauge"), + "num_mds_in": ("num_mds_in", "gauge"), + "num_mds_failed": ("num_mds_failed", "gauge"), + "mds_epoch": ("mds_epoch", "gauge"), + "health": ("health", "gauge") + } + + pool_client_metrics = { + 'bytes_sec': ("bytes_sec", "gauge"), + 'op_per_sec': ("op_per_sec", "gauge"), + 'read_bytes_sec': ("read_bytes_sec", "gauge"), + 'write_op_per_sec': ("write_op_per_sec", "gauge"), + 'write_bytes_sec': ("write_bytes_sec", "gauge"), + 'read_op_per_sec': ("read_op_per_sec", "gauge") + } + + pool_recovery_metrics = { + "recovering_objects_per_sec": ("recovering_objects_per_sec", "gauge"), + "recovering_bytes_per_sec": ("recovering_bytes_per_sec", "gauge"), + "recovering_keys_per_sec": ("recovering_keys_per_sec", "gauge"), + "num_objects_recovered": ("num_objects_recovered", "gauge"), + "num_bytes_recovered": ("num_bytes_recovered", "gauge"), + "num_keys_recovered": ("num_keys_recovered", "gauge") + } + + osd_metrics = { + "status": ("status", "gauge") + } + + mon_states = { + "mon_status": ("mon_status", "gauge") + } + + all_metrics = merge_dicts(pool_recovery_metrics, pool_client_metrics) + all_metrics = merge_dicts(all_metrics, cluster_metrics) + all_metrics = merge_dicts(all_metrics, osd_metrics) + all_metrics = merge_dicts(all_metrics, mon_states) + + def __init__(self, *args, **kwargs): + BaseCollector.__init__(self, *args, **kwargs) + self.version = self._get_version() + if self.version < 12: + self.get_mon_health = self._mon_health + else: + self.get_mon_health = self._mon_health_new + + def _get_version(self): + vers_info = self._mon_command('version') + return int(vers_info['version'].replace('.', ' ').split()[2]) + + def _mon_command(self, cmd_request): + """ Issue a command to the monitor """ + + buf_s = '{}' + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + + start = time.time() + with rados.Rados(conffile=conf_file) as cluster: + cmd = {'prefix': cmd_request, 'format': 'json'} + rc, buf_s, out = cluster.mon_command(json.dumps(cmd), b'') + end = time.time() + + self.logger.debug("_mon_command call '{}' :" + " {:.3f}s".format(cmd_request, + (end - start))) + + return json.loads(buf_s) + + @staticmethod + def get_feature_state(summary_data, pg_states): + """ + Look at the summary list to determine the state of RADOS features + :param summary_data: (list) summary data from a ceph health command + :return: (dict) dict indexed by feature + 0 Inactive, 1 Active, 2 Disabled + """ + feature_lookup = {"noscrub": "scrub", + "nodeep-scrub": "deep_scrub", + "norecover": "recovery", + "nobackfill": "backfill", + "norebalance": "rebalance", + "noout": "out", + "nodown": "down"} + + # Start with all features inactive i.e. enabled + feature_state = {feature_lookup.get(key): 0 for key in feature_lookup} + + for summary in summary_data: + summary_desc = summary.get('summary') + if "flag(s) set" in summary_desc: + flags = summary_desc.replace(' flag(s) set', '').split(',') + for disabled_feature in flags: + if disabled_feature in feature_lookup: + feature = feature_lookup.get(disabled_feature) + feature_state[feature] = 2 # feature disabled + + # Now use the current pg state names to determine whether a feature is + # active - if not it stays set to '0', which means inactive + pg_state_names = [pg_state.get('name') for pg_state in pg_states] + for pg_state in pg_state_names: + states = pg_state.split('+') + if 'recovering' in states: + feature_state['recovery'] = 1 # Active + continue + if 'backfilling' in states: + feature_state['backfill'] = 1 + continue + if 'deep' in states: + feature_state['deep_scrub'] = 1 + continue + if 'scrubbing' in states: + feature_state['scrub'] = 1 + + return feature_state + + @classmethod + def check_stuck_pgs(cls, summary_list): + bad_pg_words = ['pgs', 'stuck', 'inactive'] + stuck_pgs = 0 + for summary_data in summary_list: + if summary_data.get('severity') != 'HEALTH_ERR': + continue + if all(trigger in summary_data.get('summary') + for trigger in bad_pg_words): + stuck_pgs = int(summary_data.get('summary').split()[0]) + + return stuck_pgs + + def _mon_health_new(self): + + cluster, health_data = self._mon_health_common() + + mon_status_output = self._mon_command('mon_status') + quorum_list = mon_status_output.get('quorum') + mon_list = mon_status_output.get('monmap').get('mons') + mon_status = {} + for mon in mon_list: + state = 0 if mon.get('rank') in quorum_list else 4 + mon_status[mon.get('name')] = state + + cluster['mon_status'] = mon_status + + return cluster + + def _mon_health_common(self): + + # for v12 (Luminous and beyond) add the following setting to + # ceph.conf "mon_health_preluminous_compat=true" + # this will provide the same output as pre-luminous + + cluster_data = self._admin_socket().get('cluster') + pg_data = self._mon_command("pg stat") + health_data = self._mon_command("health") + health_text = health_data.get('overall_status', + health_data.get('status', '')) + + cluster = {Mon.cluster_metrics[k][0]: cluster_data[k] + for k in cluster_data} + + health_num = Mon.health.get(health_text, 16) + + cluster['health'] = health_num + + pg_states = pg_data.get('num_pg_by_state') # list of dict name,num + health_summary = health_data.get('summary', []) # list of issues + cluster['num_pgs_stuck'] = Mon.check_stuck_pgs(health_summary) + cluster['features'] = Mon.get_feature_state(health_summary, + pg_states) + + self.logger.debug( + 'Features:{}'.format(json.dumps(cluster['features']))) + + return cluster, health_data + + def _mon_health(self): + + cluster, health_data = self._mon_health_common() + + services = health_data.get('health').get('health_services') + monstats = {} + for svc in services: + if 'mons' in svc: + # Each monitor will have a numeric value denoting health + monstats = { mon.get('name'): Mon.health.get(mon.get('health')) + for mon in svc.get('mons')} + + cluster['mon_status'] = monstats + + return cluster + + @classmethod + def _seed(cls, metrics): + return {metrics[key][0]: 0 for key in metrics} + + def display_names(self, metric_format, metrics): + """ + convert the keys to the static descriptions + :return: + """ + return {metric_format[k][0]: metrics[k] + for k in metrics} if metrics else {} + + def _get_pool_stats(self): + """ get pool stats from rados """ + + raw_stats = self._mon_command('osd pool stats') + pool_stats = {} + + # process each pool + for pool in raw_stats: + + pool_name = pool['pool_name'].replace('.', '_') + client_io = self.display_names(Mon.pool_client_metrics, + pool.get('client_io_rate')) + recovery = self.display_names(Mon.pool_recovery_metrics, + pool.get('recovery_rate')) + + pool_md = {} + if client_io: + + # Add pool level aggregation + client_io['bytes_sec'] = client_io.get('read_bytes_sec', 0) + \ + client_io.get('write_bytes_sec', 0) + client_io["op_per_sec"] = client_io.get('read_op_per_sec', 0)+ \ + client_io.get('write_op_per_sec', 0) + pool_md = client_io + + else: + pool_md = Mon._seed(Mon.pool_client_metrics) + + if recovery: + pool_md = merge_dicts(pool_md, recovery) + else: + pool_md = merge_dicts(pool_md, Mon._seed( + Mon.pool_recovery_metrics)) + + pool_stats[pool_name] = pool_md + + return pool_stats + + def _get_osd_states(self): + + self.logger.debug("fetching osd states from the local mon") + raw = self._mon_command('osd dump') + osd_hosts = set() + osds = {} + for osd in raw.get('osds'): + cluster_addr = osd.get('cluster_addr').split(':')[0] + osd_hosts.add(cluster_addr) + + # NB. The key for the osds dict must be a string as the dict is + # flattened when the metric name is derived in the parent collectd + # module. If it is not converted, you get a TypeError + osds[str(osd.get('osd'))] = {"up": osd.get('up'), + "in": osd.get('in')} + + return len(osd_hosts), osds + + @staticmethod + def _select_pools(pools, mons): + """ + determine the pools this mon should scan based on it's name. We select + pools from the an offset into the pool list, and then repeat at an + interval set by # mons in the configuration. This splits up the pools + we have, so each mon looks at a discrete set of pools instead of all + mons performing all scans. + :param pools: (list) rados pool names + :param mons: (list) monitor names from ceph health + :return: (list) of pools this monitor should scan. empty list if the + monitor name mismatches - so no scans done + """ + + pools_to_scan = [] + + try: + freq = mons.index(get_hostname()) + except ValueError: + # this host's name is not in the monitor list? + # twilight zone moment + pass + else: + + pools_to_scan = [pools[ptr] + for ptr in xrange(freq, len(pools), len(mons))] + + return pools_to_scan + + def get_pools(self): + skip_pools = ('default.rgw') + + start = time.time() + conf_file = "/etc/ceph/{}.conf".format(self.cluster_name) + with rados.Rados(conffile=conf_file) as cluster: + rados_pools = sorted(cluster.list_pools()) + end = time.time() + + self.logger.debug('lspools took {:.3f}s'.format(end - start)) + + filtered_pools = [pool for pool in rados_pools + if not pool.startswith(skip_pools)] + + return filtered_pools + + def _get_rbds(self, monitors): + + pool_list = self.get_pools() + mon_list = sorted(monitors.keys()) + my_pools = Mon._select_pools(pool_list, mon_list) + self.logger.debug("Pools to be scanned on this mon" + " : {}".format(','.join(my_pools))) + threads = [] + + start = time.time() + + for pool in my_pools: + thread = RBDScanner(self.cluster_name, pool) + thread.start() + threads.append(thread) + + # wait for all threads to complete + for thread in threads: + thread.join(1) + + end = time.time() + self.logger.debug("rbd scans {:.3f}s".format((end - start))) + + total_rbds = sum([thread.num_rbds for thread in threads]) + self.logger.debug("total rbds found : {}".format(total_rbds)) + + for thread in threads: + del thread + + return total_rbds + + def get_stats(self): + """ + method associated with the plugin callback to gather the metrics + :return: (dict) metadata describing the state of the mon/osd's + """ + + start = time.time() + + pool_stats = self._get_pool_stats() + num_osd_hosts, osd_states = self._get_osd_states() + cluster_state = self.get_mon_health() + cluster_state['num_osd_hosts'] = num_osd_hosts + cluster_state['num_rbds'] = self._get_rbds(cluster_state['mon_status']) + + all_stats = merge_dicts(cluster_state, {"pools": pool_stats, + "osd_state": osd_states}) + + end = time.time() + self.logger.info("mon get_stats call : {:.3f}s".format((end - start))) + + return {"mon": all_stats} + diff --git a/collectors/osd.py b/collectors/osd.py new file mode 100644 index 0000000..152e7b0 --- /dev/null +++ b/collectors/osd.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python + +import os +import time +import math + +from collectors.base import BaseCollector +from collectors.common import (todict, fread, freadlines, merge_dicts, + IOstat, Disk) + +__author__ = "Paul Cuzner" + + +class OSDstats(object): + + osd_capacity = { + "stat_bytes": ("stat_bytes", "gauge"), + "stat_bytes_used": ("stat_bytes_used", "gauge"), + "stat_bytes_avail": ("stat_bytes_avail", "gauge") + } + + filestore_metrics = { + "journal_latency", + "commitcycle_latency", + "apply_latency", + "queue_transaction_latency_avg" + } + + def __init__(self, osd_type='filestore'): + self._current = {} + self._previous = {} + self._osd_type = osd_type + self.osd_percent_used = 0 + + def update(self, stats): + """ + update the objects attributes based on the dict + :param stats: (dict) containing filestore performance ('filestore') + and capacity info ('osd') + :return: None + """ + + if self._current: + self._previous = self._current + self._current = stats['filestore'] + else: + self._current = stats['filestore'] + + for attr in OSDstats.filestore_metrics: + + if self._previous: + d_sum = self._current[attr].get('sum') - \ + self._previous[attr].get('sum') + d_avgcount = self._current[attr].get('avgcount') - \ + self._previous[attr].get('avgcount') + + if d_sum == 0 or d_avgcount == 0: + val = 0 + else: + val = float(d_sum) / d_avgcount + else: + # no previous value, so set to 0 + val = 0 + + setattr(self, attr, val) + + for attr in stats['osd']: + setattr(self, attr, stats['osd'].get(attr)) + + self.osd_percent_used = math.ceil((float(self.stat_bytes_used) / + self.stat_bytes) * 100) + +class OSDs(BaseCollector): + + all_metrics = merge_dicts(Disk.metrics, IOstat.metrics) + + def __init__(self, cluster_name, **kwargs): + BaseCollector.__init__(self, cluster_name, **kwargs) + self.timestamp = int(time.time()) + + self.osd = {} # dict of disk objects, each disk contains osd_id + self.jrnl = {} # dict of journal devices (if not collocated) + self.osd_id_list = [] + self.dev_lookup = {} # dict dev_name -> osd | jrnl + self.osd_count = 0 + + def __repr__(self): + + s = '' + for disk in self.osd: + s += "{}\n".format(disk) + dev = self.osd[disk] + + for var in vars(dev): + if not var.startswith('_'): + s += "{} ... {}\n".format(var, getattr(dev, var)) + return s + + def _fetch_osd_stats(self, osd_id): + + # NB: osd stats are cumulative + + stats = {} + osd_socket_name = '/var/run/ceph/{}-osd.{}.asok'.format(self.cluster_name, + osd_id) + + if not os.path.exists(osd_socket_name): + # all OSD's should expose an admin socket, so if it's missing + # this node has a problem! + raise IOError("Socket file missing for OSD {}".format(osd_id)) + + self.logger.debug("fetching osd stats for osd {}".format(osd_id)) + resp = self._admin_socket(socket_path=osd_socket_name) + + filestore_stats = resp.get('filestore') + stats['filestore'] = {key_name: filestore_stats.get(key_name) + for key_name in OSDstats.filestore_metrics} + + osd_stats = resp.get('osd') + + # Add disk usage stats + stats['osd'] = {key_name: osd_stats.get(key_name) + for key_name in OSDstats.osd_capacity.keys()} + + return stats + + @staticmethod + def get_osd_type(osd_path): + + osd_type_fname = os.path.join(osd_path, 'type') + if os.path.exists(osd_type_fname): + return fread(osd_type_fname) + else: + if os.path.exists(os.path.join(osd_path, 'journal')): + return "filestore" + else: + raise ValueError("Unrecognised OSD type") + + def _dev_to_osd(self): + """ + Look at the system to determine which disks are acting as OSD's + """ + + # the logic here uses the mount points to determine which OSD's are + # in the system. The encryption state is determine just by the use + # devicemapper (i.e. /dev/mapper prefixed devices) - since at this time + # this is all dm is used for. + + osd_indicators = {'var', 'lib', 'osd'} + + for mnt in freadlines('/proc/mounts'): + items = mnt.split(' ') + dev_path, path_name = items[:2] + if path_name.startswith('/var/lib'): + # take a close look since this is where ceph osds usually + # get mounted + dirs = set(path_name.split('/')) + if dirs.issuperset(osd_indicators): + + # get the osd_id from the name is the most simple way + # to get the id, due to naming conventions. If this fails + # though, plan 'b' is the whoami file + osd_id = path_name.split('-')[-1] + if not osd_id.isdigit(): + osd_id = fread(os.path.join(path_name, 'whoami')) + + if osd_id not in self.osd: + osd_type = OSDs.get_osd_type(path_name) + self.osd[osd_id] = OSDstats(osd_type=osd_type) + self.osd_id_list.append(osd_id) + + osd_type = self.osd[osd_id]._osd_type + if osd_type == 'filestore': + if dev_path.startswith('/dev/mapper'): + encrypted = 1 + uuid = dev_path.split('/')[-1] + partuuid = '/dev/disk/by-partuuid/{}'.format(uuid) + dev_path = os.path.realpath(partuuid) + osd_device = dev_path.split('/')[-1] + else: + encrypted = 0 + osd_device = dev_path.split('/')[-1] + + elif osd_type == 'bluestore': + block_link = os.path.join(path_name, 'block') + osd_path = os.path.realpath(block_link) + osd_device = osd_path.split('/')[-1] + encrypted = 0 + else: + raise ValueError("Unknown OSD type encountered") + + # if the osd_id hasn't been seem neither has the + # disk + self.osd[osd_device] = Disk(osd_device, + path_name=path_name, + osd_id=osd_id, + in_osd_type=osd_type, + encrypted=encrypted) + self.dev_lookup[osd_device] = 'osd' + self.osd_count += 1 + + if osd_type == 'filestore': + journal_link = os.path.join(path_name, 'journal') + else: + journal_link = os.path.join(path_name, 'block.wal') + + if os.path.exists(journal_link): + link_tgt = os.readlink(journal_link) + if link_tgt.startswith('/dev/mapper'): + encrypted = 1 + else: + encrypted = 0 + + partuuid_path = os.path.join('/dev/disk/by-partuuid', + link_tgt.split('/')[-1]) + jrnl_path = os.path.realpath(partuuid_path) + jrnl_dev = jrnl_path.split('/')[-1] + + if jrnl_dev not in self.osd: + self.jrnl[jrnl_dev] = Disk(jrnl_dev, + osd_id=osd_id, + in_osd_type=osd_type, + encrypted=encrypted) + + self.dev_lookup[jrnl_dev] = 'jrnl' + + else: + # No journal or WAL link..? + pass + + def _stats_lookup(self): + """ + Grab the disk stats from /proc/diskstats, and the key osd perf dump + counters + """ + + now = time.time() + interval = int(now) - self.timestamp + self.timestamp = int(now) + + # Fetch diskstats from the OS + for perf_entry in freadlines('/proc/diskstats'): + + field = perf_entry.split() + dev_name = field[2] + + device = None + if self.dev_lookup.get(dev_name, None) == 'osd': + device = self.osd[dev_name] + elif self.dev_lookup.get(dev_name, None) == 'jrnl': + device = self.jrnl[dev_name] + + if device: + new_stats = field[3:] + + if device.perf._current: + device.perf._previous = device.perf._current + device.perf._current = new_stats + else: + device.perf._current = new_stats + + device.perf.compute(interval) + device.refresh() + + end = time.time() + self.logger.debug("OS disk stats calculated in " + "{:.4f}s".format(end-now)) + + # fetch stats from each osd daemon + osd_stats_start = time.time() + for osd_id in self.osd_id_list: + + if self.osd[osd_id]._osd_type == 'filestore': + osd_stats = self._fetch_osd_stats(osd_id) + + # self.logger.debug('stats : {}'.format(osd_stats)) + + osd_device = self.osd[osd_id] + osd_device.update(osd_stats) + else: + self.logger.debug("skipped 'bluestore' osd perf collection " + "for osd.{}".format(osd_id)) + + osd_stats_end = time.time() + self.logger.debug("OSD perf dump stats collected for {} OSDs " + "in {:.3f}s".format(len(self.osd_id_list), + (osd_stats_end - osd_stats_start))) + + @staticmethod + def _dump_devs(device_dict): + + dumped = {} + + for dev_name in sorted(device_dict): + device = device_dict[dev_name] + dumped[dev_name] = todict(device) + + return dumped + + def dump(self): + """ + dump the osd object(s) to a dict. The object *must* not have references + to other objects - if this rule is broken cephmetrics caller will fail + when parsing the dict + + :return: (dict) dictionary representation of this OSDs on this host + """ + + return { + "num_osds": self.osd_count, + "osd": OSDs._dump_devs(self.osd), + "jrnl": OSDs._dump_devs(self.jrnl) + } + + def get_stats(self): + + start = time.time() + + self._dev_to_osd() + self._stats_lookup() + + end = time.time() + + self.logger.info("osd get_stats call " + ": {:.3f}s".format((end - start))) + + return self.dump() diff --git a/collectors/rgw.py b/collectors/rgw.py new file mode 100644 index 0000000..36772e3 --- /dev/null +++ b/collectors/rgw.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +import time + +from collectors.base import BaseCollector +from collectors.common import get_hostname, merge_dicts + +__author__ = "paul.cuzner@redhat.com" + + +class RGW(BaseCollector): + + simple_metrics = { + "req": ("requests", "derive"), + "failed_req": ("requests_failed", "derive"), + "get": ("gets", "derive"), + "get_b": ("get_bytes", "derive"), + "put": ("puts", "derive"), + "put_b": ("put_bytes", "derive"), + "qlen": ("qlen", "derive"), + "qactive": ("requests_active", "derive") + } + + int_latencies = [ + "get_initial_lat", + "put_initial_lat" + ] + + latencies = { + "get_initial_lat_sum": ("get_initial_lat_sum", "derive"), + "get_initial_lat_avgcount": ("get_initial_lat_avgcount", "derive"), + "put_initial_lat_sum": ("put_initial_lat_sum", "derive"), + "put_initial_lat_avgcount": ("put_initial_lat_avgcount", "derive") + } + + all_metrics = merge_dicts(simple_metrics, latencies) + + def __init__(self, cluster_name, admin_socket, **kwargs): + BaseCollector.__init__(self, cluster_name, admin_socket, **kwargs) + self.host_name = get_hostname() + + def _get_rgw_data(self): + + response = self._admin_socket() + + key_name = 'client.rgw.{}'.format(self.host_name) + + return response.get(key_name) + + def _filter(self, stats): + # pick out the simple metrics + + filtered = {key: stats[key] for key in RGW.simple_metrics} + + for key in RGW.int_latencies: + for _attr in stats[key]: + new_key = "{}_{}".format(key, _attr) + filtered[new_key] = stats[key].get(_attr) + + return filtered + + def get_stats(self): + + start = time.time() + + raw_stats = self._get_rgw_data() + + stats = self._filter(raw_stats) + + end = time.time() + + self.logger.info("RGW get_stats : {:.3f}s".format((end - start))) + + return {"rgw": stats} diff --git a/dashUpdater.py b/dashUpdater.py new file mode 100644 index 0000000..e916d38 --- /dev/null +++ b/dashUpdater.py @@ -0,0 +1,495 @@ +#!/usr/bin/env python2 + +import os +import sys +import logging +import json +import yaml +from requests import get, post, put +import argparse +import socket + +__author__ = 'Paul Cuzner' +__version__ = '2.0' + +HEADERS = {"Accept": "application/json", + "Content-Type": "application/json" + } + +# variables that need to be updated for the local environment must be defined +# to grafana as 'custom', for the updater to work + + +class Config(object): + pass + + +class DashBoardException(Exception): + pass + + +def get_options(): + """ + Process runtime options + + """ + # Set up the runtime overrides + parser = argparse.ArgumentParser(prog='dashmgr', + description='Manage Ceph Monitoring ' + 'dashboards in Grafana') + parser.add_argument('-c', '--config-file', type=str, + help='path of the config file to use', + default=os.path.join(os.getcwd(), 'dashboard.yml')) + parser.add_argument('-D', '--dashboard-dir', type=str, + help='path to the directory containing dashboards', + default=os.path.join( + os.getcwd(), 'dashboards/current')) + parser.add_argument('-m', '--mode', type=str, + help='run mode', + choices=['update', 'refresh'], + default='update') + parser.add_argument('-d', '--debug', action='store_true', + default=False, + help='run with additional debug') + parser.add_argument('-v', '--version', action='version', + version='%(prog)s - {}'.format(__version__)) + + return parser.parse_args() + + +def fread(file_name=None): + with open(file_name) as f: + f_data = f.read() + return f_data + + +def port_open(port, host='localhost'): + """ + Check a given port is accessible + :param port: (int) port number to check + :param host: (str)hostname to check, default is localhost + :return: (bool) true if the port is accessible + """ + socket.setdefaulttimeout(1) + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect_ex((host, port)) + sock.shutdown(socket.SHUT_RDWR) + sock.close() + return True + except socket.error: + return False + + +def get_config(file_name): + """ + read a given file, and attempt to load as yaml + :return (Config) config object instance + """ + if os.path.exists(file_name): + config_data = fread(file_name) + try: + yaml_config = yaml.load(config_data) + except: + return None + else: + cfg = Config() + cfg.grafana_host = yaml_config.get('_grafana_host', 'localhost') + cfg.dashboards = yaml_config.get('_dashboards', []) + cfg.auth = yaml_config.get('_credentials', {"user": 'admin', + "password": "admin"}) + cfg.grafana_credentials = (cfg.auth.get('user'), + cfg.auth.get('password')) + cfg.grafana_port = yaml_config.get('_grafana_port', 3000) + cfg.home_dashboard = yaml_config.get('_home_dashboard', + 'ceph-at-a-glance') + cfg.alert_dashboard = yaml_config.get('_alert_dashboard', + 'alert-status') + cfg.domain = yaml_config.get('domain', '') + cfg.yaml = yaml_config + return cfg + + else: + return None + + +def update_dashboard(dashboard_json, vars_to_update): + updates_made = 0 + templating = dashboard_json['dashboard'].get('templating') + template_names = [] + for l in templating.get('list'): + template_name = l.get('name') + if template_name in vars_to_update: + + logger.debug("\tprocessing variable '{}'".format(template_name)) + logger.debug("\tbefore") + logger.debug("\t{}".format(l)) + template_names.append(template_name) + replacement_vars = vars_to_update.get(template_name) + + if isinstance(replacement_vars, str): + replacement_vars = [replacement_vars] + + l['query'] = ','.join(replacement_vars) + num_new_items = len(replacement_vars) + if num_new_items == 1: + l['current'] = {"text": replacement_vars[0], + "value": replacement_vars[0]} + l['options'] = [{"text": replacement_vars[0], + "selected": True, + "value": replacement_vars[0]}] + else: + l['current'] = {"text": "All", + "selected": True, + "value": "$__all"} + l['options'] = [{"text": "All", + "selected": True, + "value": "$__all"}] + for item in replacement_vars: + l['options'].append({"text": item, + "selected": False, + "value": item}) + + logger.debug("\tafter") + logger.debug("\t{}".format(l)) + updates_made += 1 + + logger.info("- {} templating variables updated " + ": {}".format(updates_made, + ','.join(template_names))) + return dashboard_json + + +def load_dashboard(dashboard_dir, dashboard_name): + + sample_dashboard = os.path.join(dashboard_dir, + "{}.json".format(dashboard_name)) + if os.path.exists(sample_dashboard): + # load it in + dashboard_data = fread(sample_dashboard) + + # if domain has not been given, we need to remove it from the queries + if not config.domain: + dashboard_data = dashboard_data.replace('$domain.', '') + + try: + dashjson = json.loads(dashboard_data) + except: + raise DashBoardException("Invalid json in {} " + "dashboard".format(dashboard_name)) + else: + logger.debug("- {} sample loaded from {}".format(dashboard_name, + dashboard_dir)) + del dashjson['meta'] + dashjson['overwrite'] = True + + # 'id' must be null for this to be a create, if it is anything + # else grafana will attempt an update, which will fail + # with a 404 + dashjson['dashboard']['id'] = None + return dashjson + else: + logger.warning("- sample not available for {}, " + "skipping".format(dashboard_name)) + return {} + + +def get_dashboard(dashboard_name): + + resp = get("http://{}:{}/api/dashboards/" + "db/{}".format(config.grafana_host, + config.grafana_port, + dashboard_name), + auth=config.grafana_credentials) + + if resp.status_code == 404: + logger.info("- dashboard not found in Grafana") + return resp.status_code, {} + + elif resp.status_code == 200: + logger.debug("- fetch of {} from Grafana " + "successful".format(dashboard_name)) + return resp.status_code, resp.json() + else: + raise DashBoardException("Unknown problem fetching dashboard") + + +def put_dashboard(dashjson): + upload_str = json.dumps(dashjson) + resp = post("http://{}:{}/api/dashboards/" + "db".format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=upload_str) + + return resp.status_code + + +def star_dashboard(dashboard_id): + + resp = post('http://{}:{}/api/user/stars/' + 'dashboard/{}'.format(config.grafana_host, + config.grafana_port, + dashboard_id), + headers=HEADERS, + auth=config.grafana_credentials) + + if resp.status_code == 200: + logger.debug("- dashboard starred successfully") + else: + logger.warning("- starring dashboard with id {} " + "failed : {}".format(dashboard_id, + resp.status_code)) + return resp.status_code + +def set_home_dashboard(home_dashboard): + # Ideally we should just check the json returned from an org query...but + # 4.3 of grafana doesn't return the home dashboard or theme settings! + + logger.debug("- checking '{}' is starred".format(home_dashboard)) + + http_rc, dashjson = get_dashboard(home_dashboard) + if http_rc == 200 and dashjson: + + dash_id = dashjson.get('dashboard').get('id') + is_starred = dashjson.get('meta').get('isStarred') + if not is_starred: + # star it + http_rc = star_dashboard(dash_id) + is_starred = True if http_rc == 200 else False + + if is_starred: + # update the org's home dashboard + resp = put('http://{}:{}/api/org/' + 'preferences'.format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=json.dumps({"name": "Main Org.", + "theme": "light", + "homeDashboardId": dash_id})) + + if resp.status_code == 200: + logger.info("- setting home dashboard complete") + else: + logger.error("- setting home dashboard failed") + + return resp.status_code + + else: + logger.error("- unable to access dashboard {}".format(home_dashboard)) + + return http_rc + + +def setup_logging(): + + logger = logging.getLogger('dashUpdater') + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler(stream=sys.stdout) + if opts.debug: + stream_handler.setLevel(logging.DEBUG) + else: + stream_handler.setLevel(logging.INFO) + + logger.addHandler(stream_handler) + + return logger + + +def get_notification_id(channel_name): + """ + Check whether the given notification channel has been defined to Grafana + :param (str) notification channel name + :return: (int) id of the channel, or 0 for doesn't exist + """ + + resp = get("http://{}:{}/api/" + "alert-notifications".format(config.grafana_host, + config.grafana_port), + auth=config.grafana_credentials) + + if resp.status_code == 200: + notifications = resp.json() # list if dicts returned by Grafana + + # convert the list into a dict for lookup purposes + channels = {channel.get('name'): channel.get('id') + for channel in notifications} + if channel_name in channels: + return channels[channel_name] + else: + return 0 + else: + raise DashBoardException("Unable to get nofification channels from" + " Grafana") + + +def define_notification(channel_name): + """ + Add a given "seed" notification channel to Grafana using http post + :param channel_name: (str) channel name + :return: (int) http response code from post operation + (dict) response json object + """ + + seed_channel = json.dumps({"name": channel_name, + "type": "email", + "isDefault": False + }) + + resp = post('http://{}:{}/api/' + 'alert-notifications'.format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=seed_channel) + + return resp.status_code, resp.json() + + +def main(): + + rc = 0 + + if port_open(config.grafana_port, config.grafana_host): + logger.debug("Connection to Grafana is ok") + else: + logger.error("Unable to contact Grafana - does the config file " + "specify a valid host/ip address for Grafana?") + return 16 + + if config.dashboards: + vars_to_update = {k: config.yaml[k] for k in config.yaml + if not k.startswith('_')} + if 'domain' not in vars_to_update: + vars_to_update['domain'] = config.domain + + else: + logger.error("Config file doesn't contain dashboards! Unable " + "to continue") + return 16 + + dashboards_updated = 0 + logger.debug("Templates to update: {}".format(vars_to_update)) + + for dashname in config.dashboards: + logger.info("\nProcessing dashboard {}".format(dashname)) + + http_rc, dashjson = get_dashboard(dashname) + if dashname == config.alert_dashboard and http_rc == 200: + logger.info("- existing alert dashboard found, update bypassed") + continue + + if opts.mode == 'update': + + if http_rc == 200: + # the dashboard is already loaded, so we'll use the existing + # definition + logger.debug("- existing dashboard will be updated") + else: + # get of dashboard failed, so just load it + dashjson = load_dashboard(opts.dashboard_dir, dashname) + + if dashjson: + logger.info("- dashboard loaded from sample") + else: + logger.warning("- sample not available, skipping") + rc = max(rc, 4) + continue + + logger.info("- dashboard retrieved") + + elif opts.mode == 'refresh': + + dashjson = load_dashboard(opts.dashboard_dir, dashname) + + if not dashjson: + logger.warning("- sample not available, skipping") + rc = max(rc, 4) + continue + + if dashname == config.alert_dashboard: + # if processing is here, this is 1st run so the alert_dashboard + # is new to grafana + channel_id = get_notification_id("cephmetrics") + if channel_id: + logger.info("- notification channel already in place") + else: + http_rc, resp_json = define_notification("cephmetrics") + if http_rc == 200: + channel_id = resp_json['id'] + logger.info("- notification channel added :" + "{}".format(channel_id)) + else: + raise DashBoardException("Problem adding notification " + "channel ({})".format(http_rc)) + + dash_str = json.dumps(dashjson) + dash_str = dash_str.replace('"notifications": []', + '"notifications": [{{ "id":' + ' {0} }}]'.format(channel_id)) + if config.domain: + logger.debug("- queries updated, replacing $domain with " + "'{}'".format(config.domain)) + dash_str = dash_str.replace('.$domain', + ".{}".format(config.domain)) + else: + logger.debug("- queries updated, replacing $domain with NULL") + dash_str = dash_str.replace('.$domain', + '') + + dashjson = json.loads(dash_str) + + else: + # Normal dashboard processing + templating = dashjson['dashboard'].get('templating') + if templating: + dashjson = update_dashboard(dashjson, vars_to_update) + else: + logger.info('- templating not defined in {}, ' + 'skipping'.format(dashname)) + rc = max(rc, 4) + + http_rc = put_dashboard(dashjson) + + if http_rc == 200: + logger.info("- dashboard update successful") + dashboards_updated += 1 + + if dashname == config.home_dashboard: + # ensure the home dashboard is defined + http_rc = set_home_dashboard(dashname) + + if http_rc != 200: + logger.warning("- Unable to set the home dashboard") + rc = max(rc, 12) + + else: + logger.error("- dashboard {} update failed ({})".format(dashname, + http_rc)) + rc = max(rc, 8) + + return rc + + +if __name__ == '__main__': + + opts = get_options() + + config = get_config(opts.config_file) + + if config: + + logger = setup_logging() + + rc = main() + + sys.exit(rc) + + else: + + print("Invalid config file detected, unable to start") + sys.exit(16) diff --git a/dashboard.yml b/dashboard.yml new file mode 100644 index 0000000..5d349e0 --- /dev/null +++ b/dashboard.yml @@ -0,0 +1,41 @@ +--- +########################################################## +# Change these settings to reflect your ceph environment # +########################################################## +osd_servers: + - obj-osd-1 + - obj-osd-2 + - obj-osd-3 + +rgw_servers: + - obj-rgw-1 + +domain: storage.lab + +########################################################################### +# This section defines the internal variables (denoted by the '_' prefix) # +# that govern how dashUpdater.py runs. Normally you'd leave these alone # +########################################################################### +_dashboards: + - alert-status + - ceph-at-a-glance + - ceph-backend-storage + - ceph-cluster + - ceph-osd-information + - ceph-pools + - ceph-rgw-workload + - disk-busy-by-server + - iops-by-server + - latency-by-server + - network-usage-by-node + - osd-node-detail + +_home_dashboard: ceph-at-a-glance +_alert_dashboard: alert-status + +_credentials: + user: admin@localhost + password: admin + +_grafana_port: 3000 + diff --git a/dashboards/archive/Ceph_dashboard-2017-05-19.json b/dashboards/archive/Ceph_dashboard-2017-05-19.json new file mode 100644 index 0000000..f8099f0 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-19.json @@ -0,0 +1,788 @@ +{ + "__inputs": [ + { + "name": "DS_INFLUX", + "label": "influx", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 157, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_mon", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "reads", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "writes", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client IOPS for all Pools", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Reads", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Writes", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Throughput - All Pools", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Pool Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard", + "version": 2 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-24.json b/dashboards/archive/Ceph_dashboard-2017-05-24.json new file mode 100644 index 0000000..7ea21a7 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-24.json @@ -0,0 +1,2413 @@ +{ + "__inputs": [ + { + "name": "DS_INFLUX", + "label": "influx", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "vonage-status-panel", + "name": "Status Panel", + "version": "1.0.4" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 226, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_mon", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "clusterName": "OSDs", + "displayName": "OSDs", + "flipCard": false, + "flipTime": 5, + "id": 20, + "isGrayOnNoData": true, + "links": [], + "minSpan": 1, + "namePrefix": "", + "span": 1, + "targets": [ + { + "aggregation": "Last", + "alias": "Total", + "displayType": "Annotation", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Up", + "display": true, + "displayType": "Regular", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "In", + "displayType": "Regular", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "valueDisplayRegex": "/.*/", + "valueHandler": "Text Only" + } + ], + "title": "", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "decimals": null, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": false, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.write_op_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Client Bandwidth", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 16, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [ + "1", + "50" + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "PG's peering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Object Degraded", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Objects Unfound", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Objects Recovering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.recovering_objects_per_sec", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "title": "Health Indicators", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_INFLUX}", + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSDs", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "OSD's Active (in)", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_in", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.write_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.write_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 223, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.put_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_INFLUX}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_INFLUX}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Degraded", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_degraded", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Misplaced", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_misplaced", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "UnFound", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_object_unfound", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_INFLUX}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Active", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_active", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Active/Clean", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_active_clean", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Peering", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.cluster.num_pg_peering", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_INFLUX}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_INFLUX}", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "show series ", + "refresh": 1, + "regex": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.pools\\.(\\w+)/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_INFLUX}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name", + "options": [], + "query": "show series", + "refresh": 1, + "regex": "/collectd\\.(.*)\\.cephmetrics.*\\.rgw/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard", + "version": 57 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-25.json b/dashboards/archive/Ceph_dashboard-2017-05-25.json new file mode 100644 index 0000000..915b0e1 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-25.json @@ -0,0 +1,2689 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_RGW_NAME", + "type": "constant", + "label": "RGW Host", + "value": "obj-rgw-1", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery Workload", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Bandwidth", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used" + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload by Pool", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "height": "245px", + "id": 16, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 12, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [ + "1", + "50" + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "PG's peering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')", + "textEditor": false + }, + { + "alias": "Object Degraded", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_degraded", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_degraded\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + }, + { + "alias": "Objects Unfound", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_unfound", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object_unfound\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + }, + { + "alias": "Objects Recovering", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_objects_per_sec", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_objects_per_sec, 'Objects recovering')" + } + ], + "title": "Health Indicators", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 2, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name_old", + "options": [], + "query": "show series ", + "refresh": 1, + "regex": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools\\.(\\w+)/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 2, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name_old", + "options": [], + "query": "show series", + "refresh": 1, + "regex": "/collectd\\.(.*)\\.cephmetrics.*\\.rgw/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_RGW_NAME}", + "text": "${VAR_RGW_NAME}" + }, + "hide": 0, + "label": "RGW Host", + "name": "rgw_name", + "options": [ + { + "value": "${VAR_RGW_NAME}", + "text": "${VAR_RGW_NAME}" + } + ], + "query": "${VAR_RGW_NAME}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 1 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-26.json b/dashboards/archive/Ceph_dashboard-2017-05-26.json new file mode 100644 index 0000000..e6cb476 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-26.json @@ -0,0 +1,2792 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 50465865728, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used" + } + ], + "thresholds": "35949672960,42949672960", + "title": "Capacity Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.*.cpu.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + {} + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Breakdown", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": true, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 17 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-29.json b/dashboards/archive/Ceph_dashboard-2017-05-29.json new file mode 100644 index 0000000..9765ed9 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-29.json @@ -0,0 +1,3669 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.vdb.osd_id,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "Disk Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)", + "textEditor": true + } + ], + "thresholds": "20,60", + "title": "Latency(ms)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail" + } + ], + "thresholds": "", + "title": "Free Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.percent.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "300", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_mbps), \"Read Throughput\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_mbps), \"Write Throughput\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Load (MB/s) - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Latency (ms) - all OSD's - at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Overall Disk Busy at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - all OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend IOPS - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(collectd.*.$domain.cpu.percent.idle,0.01),1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend OSD Load Summary", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 39, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_id,1,-2)", + "textEditor": true + } + ], + "title": "Host/Disk to OSD ID Mapping", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 41, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,-3)", + "textEditor": true + } + ], + "title": "Disk Latency Breakdown (ms)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 42, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.mbps,1,-3)", + "textEditor": true + } + ], + "title": "Disk Bandwidth (MB/s)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 45, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util,1,-3)", + "textEditor": true + } + ], + "title": "Disk %Util", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 48, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,-3)", + "textEditor": true + } + ], + "title": "IOPS/Disk", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Detail", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": true, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 37 +} \ No newline at end of file diff --git a/dashboards/archive/Ceph_dashboard-2017-05-31.json b/dashboards/archive/Ceph_dashboard-2017-05-31.json new file mode 100644 index 0000000..4fbc5e6 --- /dev/null +++ b/dashboards/archive/Ceph_dashboard-2017-05-31.json @@ -0,0 +1,3757 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_MONITOR", + "type": "constant", + "label": "monitor", + "value": "obj-mon-1.storage.lab", + "description": "" + }, + { + "name": "VAR_CLUSTER_NAME", + "type": "constant", + "label": "cluster_name", + "value": "ceph", + "description": "" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 243, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.health" + } + ], + "thresholds": "1", + "title": "Health", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "Warning", + "value": "4" + }, + { + "op": "=", + "text": "Error", + "value": "8" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_mon\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mon", + "textEditor": true + } + ], + "thresholds": "", + "title": "Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.vdb.osd_id,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(offset(scale(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put,0),1))", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_mds_in" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(193, 106, 31)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.recovering_bytes_per_sec", + "textEditor": true + } + ], + "thresholds": "", + "title": "Recovery", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A, #B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_op_per_sec, collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_op_per_sec).select metric" + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec" + }, + { + "dsType": "influxdb", + "groupBy": [], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.pools._all_.write_bytes_sec" + }, + { + "refId": "C", + "target": "sumSeries(#A,#B).select metric", + "targetFull": "sumSeries(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.read_bytes_sec,collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools._all_.write_bytes_sec).select metric" + } + ], + "thresholds": "", + "title": "Client Load", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "Disk Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_LOCAL}", + "decimals": 1, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 50, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)", + "textEditor": true + } + ], + "thresholds": "20,60", + "title": "Latency(ms)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail" + } + ], + "thresholds": "", + "title": "Free Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Overview", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 256, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 7, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Raw Capacity", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 'Raw')" + }, + { + "alias": "Used", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.osd_bytes_used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 'Used')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pool" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 18, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd, 'OSDs')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_in, 'OSDs In')" + }, + { + "alias": "OSD's Up", + "dsType": "influxdb", + "groupBy": [], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_in", + "policy": "default", + "query": "SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_osd_up\" WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_osd_up, 'OSDs Up')" + } + ], + "title": "OSD State", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster Capacity", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec, 'Reads')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_op_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_op_per_sec, 'Writes')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload IOPS (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.read_bytes_sec, 'Read')" + }, + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_bytes_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.write_bytes_sec, 'Write')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Workload Throughput (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.recovering_bytes_per_sec": "#C15C17" + }, + "bars": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "", + "dsType": "influxdb", + "groupBy": [], + "measurement": "/collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools.$pool_name.read_op_per_sec/", + "policy": "default", + "query": "SELECT \"value\" FROM /collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec/ WHERE $timeFilter", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "target": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.$pool_name.recovering_bytes_per_sec" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Overhead (pools: $pool_name)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bandwidth", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Workload Detail by Pool", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 302, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum" + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount).select metric", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "divideSeries(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount).select metric", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 5, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')" + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests/sec", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_LOCAL}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.qlen", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_name.$domain.cephmetrics.derive.$cluster_name.rgw.qlen" + } + ], + "thresholds": "", + "title": "Queue", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(averageSeries(collectd.$rgw_name.$domain.cpu.percent.idle),0.01), 1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.rx), 'rx')" + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.$rgw_name.$domain.interface.*.if_octets.tx), 'tx')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host (S3/Swift)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "300", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_mbps), \"Read Throughput\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_mbps), \"Write Throughput\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Load (MB/s) - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend Disk Latency (ms) - all OSD's - at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 43, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Overall Disk Busy at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - all OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/iops-by-server", + "dashboard": "IOPS by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "IOPS by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Backend IOPS - all OSD's", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(scale(collectd.*.$domain.cpu.percent.idle,0.01),1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend OSD Load Summary", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 39, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_id,1,-2)", + "textEditor": true + } + ], + "title": "Host/Disk to OSD ID Mapping", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 41, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,-3)", + "textEditor": true + } + ], + "title": "Disk Latency Breakdown (ms)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 42, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.mbps,1,-3)", + "textEditor": true + } + ], + "title": "Disk Bandwidth (MB/s)", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 45, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util,1,-3)", + "textEditor": true + } + ], + "title": "Disk %Util", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "filterNull": false, + "fontSize": "100%", + "id": 48, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 2, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,-3)", + "textEditor": true + } + ], + "title": "IOPS/Disk", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Detail", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 10, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object, 'Objects')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_degraded, 'Objects degraded')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_misplaced, 'Objects misplaced')" + }, + { + "alias": "Objects", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_object", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_object_unfound, 'Objects unfound')" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_LOCAL}", + "filterNull": false, + "fontSize": "100%", + "id": 13, + "links": [], + "minSpan": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "span": 3, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg, 'PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active, 'Active PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean, 'Active+clean PGs')" + }, + { + "alias": "PG's", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.num_pg_peering, 'PGs peering')" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RADOS", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + }, + "hide": 2, + "label": null, + "name": "monitor", + "options": [ + { + "value": "${VAR_MONITOR}", + "text": "${VAR_MONITOR}" + } + ], + "query": "${VAR_MONITOR}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + }, + "hide": 2, + "label": null, + "name": "cluster_name", + "options": [ + { + "value": "${VAR_CLUSTER_NAME}", + "text": "${VAR_CLUSTER_NAME}" + } + ], + "query": "${VAR_CLUSTER_NAME}", + "type": "constant" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "Workload Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "collectd.$monitor.cephmetrics.gauge.$cluster_name.mon.pools.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_LOCAL}", + "hide": 0, + "includeAll": false, + "label": "RGW Host", + "multi": false, + "name": "rgw_name", + "options": [], + "query": "collectd.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Dashboard (Graphite)", + "version": 43 +} \ No newline at end of file diff --git a/dashboards/archive/Disk Busy by Server-2017-05-31.json b/dashboards/archive/Disk Busy by Server-2017-05-31.json new file mode 100644 index 0000000..fdc6468 --- /dev/null +++ b/dashboards/archive/Disk Busy by Server-2017-05-31.json @@ -0,0 +1,404 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Servers Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by Highest Disk %Util", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's $percentile%ile Utilisation", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Disk Busy by Server", + "version": 25 +} \ No newline at end of file diff --git a/dashboards/archive/IOPS by Server-2017-05-31.json b/dashboards/archive/IOPS by Server-2017-05-31.json new file mode 100644 index 0000000..02de4d6 --- /dev/null +++ b/dashboards/archive/IOPS by Server-2017-05-31.json @@ -0,0 +1,363 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.iops,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Server IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.iops)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "IOPS by Server", + "version": 2 +} \ No newline at end of file diff --git a/dashboards/archive/Latency by Server-2017-05-31.json b/dashboards/archive/Latency by Server-2017-05-31.json new file mode 100644 index 0000000..b5465dd --- /dev/null +++ b/dashboards/archive/Latency by Server-2017-05-31.json @@ -0,0 +1,384 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL", + "label": "Local", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + }, + { + "name": "VAR_DOMAIN", + "type": "constant", + "label": "domain", + "value": "storage.lab", + "description": "" + }, + { + "name": "VAR_CEPH_CLUSTER", + "type": "constant", + "label": "ceph_cluster", + "value": "ceph", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.await,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Servers - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_LOCAL}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.await)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + }, + "hide": 2, + "label": null, + "name": "domain", + "options": [ + { + "value": "${VAR_DOMAIN}", + "text": "${VAR_DOMAIN}" + } + ], + "query": "${VAR_DOMAIN}", + "type": "constant" + }, + { + "current": { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + }, + "hide": 2, + "label": null, + "name": "ceph_cluster", + "options": [ + { + "value": "${VAR_CEPH_CLUSTER}", + "text": "${VAR_CEPH_CLUSTER}" + } + ], + "query": "${VAR_CEPH_CLUSTER}", + "type": "constant" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + } + ], + "query": "95", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Latency by Server", + "version": 1 +} \ No newline at end of file diff --git a/dashboards/current/Dashboard Relationships.png b/dashboards/current/Dashboard Relationships.png new file mode 100644 index 0000000..609a3c1 Binary files /dev/null and b/dashboards/current/Dashboard Relationships.png differ diff --git a/dashboards/current/alert-status.json b/dashboards/current/alert-status.json new file mode 100644 index 0000000..cc1fc28 --- /dev/null +++ b/dashboards/current/alert-status.json @@ -0,0 +1,1159 @@ +{ + "dashboard": { + "__requires": [ + { + "id": "alertlist", + "name": "Alert List", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "4.4.1" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "" + }, + { + "id": "graphite", + "name": "Graphite", + "type": "datasource", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 58, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "id": 1, + "limit": "20", + "links": [], + "onlyAlertsOnDashboard": true, + "show": "current", + "sortOrder": 3, + "span": 12, + "stateFilter": [ + "alerting" + ], + "title": "Active Ceph Alert List", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "frequency": "30s", + "handler": 1, + "message": "Cluster Health is not OK", + "name": "Overall Ceph Health", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Ceph Health": "#890F02", + "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 4 or 8 where 0 is OK, 4 is WARN and 8 represents an ERROR state.", + "fill": 1, + "hideTimeOverride": true, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": true, + "targets": [ + { + "refId": "A", + "target": "alias(limit(sortByMaxima(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.mon.health), \"max\")),1), 'Ceph Health')", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Overall Ceph Health", + "tooltip": { + "shared": false, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "10", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "frequency": "60s", + "handler": 1, + "message": "DIsks Near full detected within the cluster. Warning threshold is 80% full.", + "name": "Disks Near Full", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(countSeries(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.fs_percent_used),80)), \"disks near full\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disks Near Full", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "30s", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "message": "OSD Down event", + "name": "OSDs Down", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Count of OSDs currently in a DOWN state", + "fill": 2, + "hideTimeOverride": true, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": true, + "targets": [ + { + "hide": true, + "refId": "A", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")", + "textEditor": true + }, + { + "hide": false, + "refId": "C", + "target": "alias(diffSeries(#A,#B), \"down\")", + "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")), \"down\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "message": "Cluster Capacity Limit Warning", + "name": "Cluster Capacity", + "noDataState": "keep_state", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "hide": true, + "refId": "A", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(asPercent(#B, #A), \"Raw Capacity Used %\")", + "targetFull": "alias(asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\"), alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")), \"Raw Capacity Used %\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "4h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "message": "PG peering is a taking a long time to finish", + "name": "PG Peering Delay", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This chart shows the pg peering state over the last 6 hours. If peering has been ongoing for more than 4hrs, this alert will trigger to prompt further investigation on the ceph cluster", + "fill": 1, + "hideTimeOverride": true, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.*.mon.num_pg_peering, \"maxSeries\")), \"# pg's in peering state\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "6h", + "timeShift": null, + "title": "PG Peering State", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "message": "PG's stuck inactive", + "name": "PG's Stuck", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.", + "fill": 2, + "hideTimeOverride": true, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.*.mon.num_pgs_stuck, \"maxSeries\")), \"# pg's stuck inactive\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "6h", + "timeShift": null, + "title": "PG's Stuck", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "min" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "message": "OSD Host Loss Free Space Check Failed", + "name": "OSD Host Loss Check", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "hide": true, + "refId": "A", + "target": "alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\")", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(diffSeries(#A,#B),\"freespace after Node loss\")", + "targetFull": "alias(diffSeries(alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\"),alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")),\"freespace after Node loss\")", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Host Loss Check", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "message": "OSD Response time is > 1s", + "name": "Slow OSD responses alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Graph checking for OSD Latencies that are above 1s.", + "fill": 1, + "hideTimeOverride": true, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(currentAbove(collectd.*.$domain.cephmetrics.gauge.*.osd.*.perf.await,1000),1,-3)", + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1000 + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Slow OSD responses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Health Checks", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Alert Status", + "version": 0 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "alert-status", + "type": "db", + "updated": "2017-08-01T05:18:58Z", + "updatedBy": "admin@localhost", + "version": 0 + } +} \ No newline at end of file diff --git a/dashboards/current/ceph-at-a-glance.json b/dashboards/current/ceph-at-a-glance.json new file mode 100644 index 0000000..cba9f58 --- /dev/null +++ b/dashboards/current/ceph-at-a-glance.json @@ -0,0 +1 @@ +{"meta":{"isStarred":true,"type":"db","canSave":true,"canEdit":true,"canStar":true,"slug":"ceph-at-a-glance","expires":"0001-01-01T00:00:00Z","created":"2017-07-13T06:58:30Z","updated":"2017-07-20T20:08:12Z","updatedBy":"admin","createdBy":"admin","version":49},"dashboard":{"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":true,"id":1,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["overview"],"targetBlank":true,"title":"Shortcuts","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"145","panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":86,"interval":null,"links":[{"dashUri":"db/ceph-rados","dashboard":"Ceph - RADOS","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - RADOS Status","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"RADOS","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":82,"interval":null,"links":[{"dashUri":"db/ceph-frontend","dashboard":"Ceph - Frontend","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph Pool Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"Pools","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":91,"interval":null,"links":[{"dashUri":"db/ceph-osd-latency","dashboard":"Ceph - OSD Latency","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - OSD Latency","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"OSDs","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":84,"interval":null,"links":[{"dashUri":"db/ceph-rgw-workload","dashboard":"Ceph - RGW Workload","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph RADOSGW Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"S3/Swift","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":83,"interval":null,"links":[{"dashUri":"db/ceph-backend-storage","dashboard":"Ceph Backend Storage","includeVars":true,"keepTime":true,"targetBlank":true,"title":"OSD Host Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"OSD Hosts","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"50px","id":85,"interval":null,"links":[{"dashUri":"db/network-usage-by-node","dashboard":"Network Usage by Node","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Network Usage by OSD Host","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"10%","prefix":"","prefixFontSize":"10%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":""}],"thresholds":"","title":"","transparent":true,"type":"singlestat","valueFontSize":"35%","valueMaps":[{"op":"=","text":"Network","value":"null"}],"valueName":"current"},{"content":"","id":95,"links":[],"minSpan":4,"mode":"markdown","span":4,"title":"","transparent":true,"type":"text"},{"id":94,"limit":10,"links":[{"dashUri":"db/alert-status","dashboard":"Alert Status","targetBlank":true,"title":"Alert Status","type":"dashboard"}],"minSpan":2,"onlyAlertsOnDashboard":false,"show":"current","sortOrder":3,"span":2,"stateFilter":["alerting"],"title":"Active Alerts","type":"alertlist"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6"},{"collapse":false,"height":"225","panels":[{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["rgba(1, 167, 1, 1)","rgba(255,165,0, 1)","rgba(255, 0, 0, 1)"],"datasource":"Local","description":"Shows the overall health of the ceph cluster","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"hideTimeOverride":true,"id":28,"interval":null,"links":[{"dashUri":"db/ceph-rados","dashboard":"Ceph - RADOS","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - RADOS Status","type":"dashboard"}],"mappingType":2,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"0","text":"OK","to":"0"},{"from":"1","text":"WARN","to":"4"},{"from":"5","text":"ERROR","to":"99"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.health),\"max\")","textEditor":true}],"thresholds":"1,5","timeFrom":"1m","timeShift":null,"title":"Health","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"OK","value":"0"},{"op":"=","text":"WARN","value":"4"},{"op":"=","text":"ERROR","value":"8"}],"valueName":"current"},{"clusterName":"MONs","displayName":"MONs","flipCard":false,"flipTime":5,"hideTimeOverride":true,"id":75,"isGrayOnNoData":true,"links":[],"minSpan":1,"namePrefix":"","span":1,"targets":[{"aggregation":"Last","alias":"total","displayType":"Regular","refId":"D","target":"alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\")","textEditor":true,"valueHandler":"Text Only"},{"aggregation":"Last","alias":"quorum","display":true,"displayType":"Regular","refId":"E","target":"alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")","textEditor":true,"valueHandler":"Threshold","warn":"1"},{"aggregation":"Last","alias":"down","display":true,"displayType":"Regular","refId":"A","target":"alias(diffSeries(#D,#E), \"down\")","targetFull":"alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_mon_quorum), \"max\")),\"quorum\")), \"down\")","textEditor":true,"valueHandler":"Threshold","warn":"1"}],"timeFrom":"1m","timeShift":null,"title":"","type":"vonage-status-panel"},{"clusterName":"OSDs","displayName":"OSDs","flipCard":false,"flipTime":5,"hideTimeOverride":true,"id":65,"links":[],"minSpan":1,"namePrefix":"","span":1,"targets":[{"aggregation":"Last","alias":"total","displayType":"Regular","hide":false,"refId":"E","target":"alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),\"total\")","textEditor":true,"valueHandler":"Text Only"},{"aggregation":"Last","alias":"in","displayType":"Regular","hide":false,"refId":"F","target":"alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up), \"max\")),\"up\")","textEditor":true,"valueHandler":"Text Only"},{"aggregation":"Last","alias":"up","displayType":"Regular","hide":false,"refId":"A","target":"alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_in), \"max\")),\"in\")","textEditor":true,"valueHandler":"Text Only"},{"aggregation":"Last","alias":"out","displayType":"Regular","hide":false,"refId":"B","target":"alias(diffSeries(#F,#A), \"out\")","targetFull":"alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up), \"max\")),\"up\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_in), \"max\")),\"in\")), \"out\")","textEditor":true,"valueHandler":"Text Only"},{"aggregation":"Last","alias":"down","crit":"3","display":true,"displayType":"Regular","hide":false,"refId":"D","target":"alias(diffSeries(#E,#F), \"down\")","targetFull":"alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up), \"max\")),\"up\")), \"down\")","textEditor":true,"url":"dashboard/db/ceph-backend-storage","valueHandler":"Threshold","warn":"1"}],"timeFrom":"1m","timeShift":null,"title":"","type":"vonage-status-panel"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":35,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_hosts, \"max\"))","textEditor":true}],"thresholds":"","title":"OSD Hosts","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":33,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"countSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put)","textEditor":true}],"thresholds":"","title":"RGW Hosts","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":34,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_mds_in)","textEditor":true}],"thresholds":"","title":"MDS","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(1, 167, 1, 1)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":null,"decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"id":38,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"%","postfixFontSize":"40%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"hide":true,"refId":"A","target":"alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 1,\"maxSeries\")), \"raw capacity\")","textEditor":true},{"hide":true,"refId":"B","target":"alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 1,\"maxSeries\")), \"used raw\")","textEditor":true},{"refId":"C","target":"asPercent(#B,#A)","targetFull":"asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used, 1,\"maxSeries\")), \"used raw\"),alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes, 1,\"maxSeries\")), \"raw capacity\"))","textEditor":true}],"thresholds":"70,90","title":"Capacity Utilization","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"description":"Shows the growth rate based on osd usage over the past $growth_window.","format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":80,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"hide":true,"refId":"D","target":"alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")","textEditor":true},{"hide":true,"refId":"E","target":"alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\")","textEditor":true},{"refId":"C","target":"alias(diffSeries(#E, #D), 'Growth over last $growth_window')","targetFull":"alias(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")), 'Growth over last $growth_window')","textEditor":true}],"thresholds":"","title":"$growth_window Growth Rate","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"description":"Shows the estimated number of weeks left, based on consumption over the past $growth_window.","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":81,"interval":null,"links":[],"mappingType":2,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"},{"from":"-99999","text":"N/A","to":"0"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"hide":true,"refId":"A","target":"alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")","textEditor":true},{"hide":true,"refId":"B","target":"alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\")","textEditor":true},{"hide":true,"refId":"C","target":"keepLastValue(diffSeries(#B, #A))","targetFull":"keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))","textEditor":true},{"hide":true,"refId":"D","target":"consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\")","textEditor":true},{"hide":false,"refId":"E","target":"alias(divideSeries(#D, #C), 'Weeks Left till full')","targetFull":"alias(divideSeries(consolidateBy(minSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_avail), \"min\"), keepLastValue(diffSeries(alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"1d\")), \"max\"),\"-1d ago\"), alias(consolidateBy(maxSeries(timeShift(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_bytes_used,\"7d\")), \"max\"),\"-$growth_window ago\")))), 'Weeks Left till full')","textEditor":true}],"thresholds":"","title":"Weeks Remaining","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{"active + clean":"#01a701","collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_active_clean":"#01a701","collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering":"#ffa500","peering":"#0A50A1"},"cacheTimeout":null,"combine":{"label":"Others","threshold":""},"datasource":null,"fontSize":"100%","format":"short","height":"210","hideTimeOverride":true,"id":74,"interval":null,"legend":{"percentage":false,"show":true,"values":false},"legendType":"Under graph","links":[{"dashUri":"db/ceph-rados","dashboard":"Ceph - RADOS","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - RADOS Status","type":"dashboard"}],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","pieType":"pie","span":2,"strokeWidth":"0","targets":[{"refId":"A","target":"alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\"),\"active + clean\")","textEditor":true},{"hide":true,"refId":"B","target":"alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\"),\"active\")","textEditor":true},{"refId":"C","target":"alias(diffSeries(#B,#A),\"active + degraded\")","targetFull":"alias(diffSeries(alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active), \"max\"),\"active\"),alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_active_clean), \"max\"),\"active + clean\")),\"active + degraded\")","textEditor":true},{"hide":false,"refId":"D","target":"alias(consolidateBy(maxSeries(collectd.$mon_servers.$domain.cephmetrics.gauge.$cluster_name.mon.num_pg_peering), \"max\"),\"peering\")","textEditor":true}],"timeFrom":"1m","title":"Placement Group Status","type":"grafana-piechart-panel","valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"At a Glance","titleSize":"h5"},{"collapse":false,"height":"230","panels":[{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["rgba(251,251,251, 0.97)","rgba(255,165,0, 0.89)","rgba(255, 0, 0, 1)"],"datasource":null,"description":"Indicate whether deep scrub is running within the cluster. NB. If this feature is turned off, the cluster will enter a WARN state.","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":97,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.features.deep_scrub),\"max\")","textEditor":true}],"thresholds":"1,2","title":"Deep Scrub","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"},{"op":"=","text":"INACTIVE","value":"0"},{"op":"=","text":"ACTIVE","value":"1"},{"op":"=","text":"DISABLED","value":"2"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(251,251,251, 0.97)","rgba(255,165,0, 0.89)","rgba(255, 0, 0, 1)"],"datasource":null,"decimals":1,"format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":96,"interval":null,"links":[{"dashUri":"db/ceph-frontend","dashboard":"Ceph - Frontend","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - Pools","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"/s","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"refId":"A","target":"sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.recovering_bytes_per_sec,-2,\"avg\"))","textEditor":true}],"thresholds":"1,2","title":"Recovery","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":0,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":9,"interval":null,"links":[{"dashUri":"db/ceph-frontend","dashboard":"Ceph - Frontend","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph Pool Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"hide":false,"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec","policy":"default","query":"SELECT mean(\"value\") FROM \"measurement\" WHERE $timeFilter GROUP BY time($__interval) fill(null)","rawQuery":false,"refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.op_per_sec,-2,\"maxSeries\"))","textEditor":true}],"thresholds":"","title":"Client IOPS","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":1,"format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":23,"interval":null,"links":[{"dashUri":"db/ceph-frontend","dashboard":"Ceph - Frontend","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph Pool Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"/s","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"hide":false,"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec","policy":"default","query":"SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter","rawQuery":true,"refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.bytes_sec,-2,\"maxSeries\"))","textEditor":true}],"thresholds":"","title":"Client Throughput","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":76,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pool","policy":"default","refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_pool)","textEditor":true}],"thresholds":"","title":"Pools","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":77,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"B","target":"alias(sumSeries(consolidateBy(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_rbds,6),\"max\")),\"# rbds\")","textEditor":true}],"thresholds":"","title":"RBDs","type":"singlestat","valueFontSize":"70%","valueMaps":[],"valueName":"current"},{"aliasColors":{"Reads":"#01a701","Writes":"#82B5D8"},"cacheTimeout":null,"combine":{"label":"Others","threshold":0},"datasource":null,"description":"Shows the read/write threshold of client IOPS serviced by the ceph cluster","fontSize":"80%","format":"none","height":"230","id":93,"interval":null,"legend":{"percentage":false,"show":true,"values":false},"legendType":"Under graph","links":[],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","pieType":"pie","span":2,"strokeWidth":1,"targets":[{"refId":"A","target":"alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.read_op_per_sec,-2,\"maxSeries\")), \"Reads\")","textEditor":true},{"refId":"B","target":"alias(sumSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.write_op_per_sec,-2,\"maxSeries\")), \"Writes\")","textEditor":true}],"title":"Client Read/Write Ratio","type":"grafana-piechart-panel","valueName":"current"},{"aliasColors":{"95%ile Commit Latency":"#447EBC","Apply Latency Max":"#890F02","Commit Latency":"#447EBC"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"description":"Shows the OSD apply and commit latency at the $percentile%ile across the cluster over the past 15 minutes","fill":0,"hideTimeOverride":true,"id":92,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[{"dashUri":"db/ceph-osd-latency","dashboard":"Ceph - OSD Latency","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph - OSD Latency","type":"dashboard"}],"minSpan":2,"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$percentile), \"Apply Latency\")","textEditor":true},{"refId":"B","target":"alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency, $percentile), \"Commit Latency\")","textEditor":true}],"thresholds":[],"timeFrom":"15m","timeShift":null,"title":"OSD Apply vs Commit Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Performance","titleSize":"h5"},{"collapse":false,"height":"180px","panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":null,"decimals":0,"description":"CPU usage is presented based on the $percentile%ile across all OSD hosts","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":59,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":" %","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"refId":"A","target":"percentileOfSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{system,user,wait},1,\"sumSeries\"),$percentile)","textEditor":true}],"thresholds":"70,90","title":"OSD Hosts CPU Busy","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":0,"description":"Total IOPS from all OSDs in the cluster","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":72,"interval":null,"links":[{"dashUri":"db/ceph-backend-storage","dashboard":"Ceph Backend Storage","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph OSD Host Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"hide":false,"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec","policy":"default","query":"SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter","rawQuery":true,"refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")","textEditor":true}],"thresholds":"","title":"Disk IOPS","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":1,"format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":73,"interval":null,"links":[{"dashUri":"db/ceph-backend-storage","dashboard":"Ceph Backend Storage","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph OSD Host Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"/s","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"hide":false,"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec","policy":"default","query":"SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter","rawQuery":true,"refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.bytes_per_sec)","textEditor":true}],"thresholds":"","title":"Disk Throughput","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"decimals":0,"description":"The count of the number of disks in the cluster that are over $disk_full_threshold% full.","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":79,"interval":null,"links":[{"dashUri":"db/ceph-backend-storage","dashboard":"Ceph Backend Storage","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph OSD Host Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"countSeries(currentAbove(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.fs_percent_used,$disk_full_threshold))","textEditor":true}],"thresholds":"","title":"Nearly Full Disks","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"0","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"Local","decimals":0,"description":"RAM Usage shows the $percentile%ile of RAM used across all OSD hosts","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":78,"interval":null,"links":[{"dashUri":"db/ceph-backend-storage","dashboard":"Ceph Backend Storage","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Ceph OSD Host Performance","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":1,"nullPointMode":"connected","nullText":null,"postfix":" %","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":1,"sparkline":{"fillColor":"rgba(46, 161, 15, 0)","full":false,"lineColor":"rgb(164, 139, 4)","show":false},"tableColumn":"","targets":[{"dsType":"influxdb","groupBy":[],"hide":false,"measurement":"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_op_per_sec","policy":"default","query":"SELECT \"value\" FROM \"collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.pools._all_.read_bytes_sec\" WHERE $timeFilter","rawQuery":true,"refId":"A","resultFormat":"time_series","select":[[{"params":["value"],"type":"field"}]],"tags":[],"target":"percentileOfSeries(collectd.$osd_servers.$domain.memory.percent.used,$percentile)","textEditor":true}],"thresholds":"","title":"RAM Util.","type":"singlestat","valueFontSize":"70%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(1, 167, 1,1)","rgba(255,165,0,1)","rgba(245, 54, 54, 0.9)"],"datasource":null,"decimals":0,"description":"This panel shows the $percentile%ile disk await time across all OSDs in the cliuster","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":36,"interval":null,"links":[{"dashUri":"db/latency-by-server","dashboard":"Latency by Server","includeVars":true,"targetBlank":true,"title":"Latency by Server","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":" ms","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"refId":"A","target":"percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),$percentile)","textEditor":true}],"thresholds":"20,60","title":"Disk Latency","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["rgba(1, 167, 1,1)","rgba(255,165,0,1)","rgba(245, 54, 54, 0.9)"],"datasource":null,"decimals":0,"description":"The $percentile%ile disk util value covers all OSDs in the cluster. A high value here serves as an indicator for how busy/disk bound the cluster is.","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"id":37,"interval":null,"links":[{"dashUri":"db/disk-busy-by-server","dashboard":"Disk Busy by Server","includeVars":true,"keepTime":false,"targetBlank":true,"title":"Disk Busy by Server","type":"dashboard"}],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":" %","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.45)","full":false,"lineColor":"rgb(0, 16, 137)","show":true},"tableColumn":"","targets":[{"refId":"A","target":"percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile)","textEditor":true}],"thresholds":"70,90","title":"Disk Utilization","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["overview"],"templating":{"list":[{"allValue":null,"current":{"text":"ceph","value":"ceph"},"datasource":"Local","hide":0,"includeAll":false,"label":"Cluster Name","multi":false,"name":"cluster_name","options":[],"query":"collectd.*.$domain.cephmetrics.gauge.*","refresh":1,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"qct.com","value":"qct.com"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"domain","options":[{"selected":true,"text":"qct.com","value":"qct.com"}],"query":"qct.com","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"95","value":"95"},"hide":0,"includeAll":false,"label":"Percentile","multi":false,"name":"percentile","options":[{"selected":false,"text":"80","value":"80"},{"selected":false,"text":"85","value":"85"},{"selected":false,"text":"90","value":"90"},{"selected":true,"text":"95","value":"95"},{"selected":false,"text":"98","value":"98"}],"query":"80,85,90,95,98","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"All","value":"$__all"},"hide":2,"includeAll":true,"label":null,"multi":true,"name":"osd_servers","options":[{"selected":true,"text":"All","value":"$__all"},{"selected":false,"text":"qcttwcoec22","value":"qcttwcoec22"},{"selected":false,"text":"qcttwcoec23","value":"qcttwcoec23"},{"selected":false,"text":"qcttwcoec24","value":"qcttwcoec24"},{"selected":false,"text":"qcttwcoec25","value":"qcttwcoec25"},{"selected":false,"text":"qcttwcoec26","value":"qcttwcoec26"},{"selected":false,"text":"qcttwcoec27","value":"qcttwcoec27"},{"selected":false,"text":"qcttwcoec28","value":"qcttwcoec28"},{"selected":false,"text":"qcttwcoec29","value":"qcttwcoec29"},{"selected":false,"text":"qcttwcoec30","value":"qcttwcoec30"},{"selected":false,"text":"qcttwcoec31","value":"qcttwcoec31"},{"selected":false,"text":"qcttwcoec32","value":"qcttwcoec32"},{"selected":false,"text":"qcttwcoec33","value":"qcttwcoec33"},{"selected":false,"text":"qcttwcoec34","value":"qcttwcoec34"},{"selected":false,"text":"qcttwcoec35","value":"qcttwcoec35"},{"selected":false,"text":"qcttwcoec36","value":"qcttwcoec36"},{"selected":false,"text":"qcttwcoec37","value":"qcttwcoec37"},{"selected":false,"text":"qcttwcoec38","value":"qcttwcoec38"},{"selected":false,"text":"qcttwcoec39","value":"qcttwcoec39"},{"selected":false,"text":"qcttwcoec40","value":"qcttwcoec40"}],"query":"qcttwcoec22,qcttwcoec23,qcttwcoec24,qcttwcoec25,qcttwcoec26,qcttwcoec27,qcttwcoec28,qcttwcoec29,qcttwcoec30,qcttwcoec31,qcttwcoec32,qcttwcoec33,qcttwcoec34,qcttwcoec35,qcttwcoec36,qcttwcoec37,qcttwcoec38,qcttwcoec39,qcttwcoec40","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"All","value":"$__all"},"hide":2,"includeAll":false,"label":null,"multi":true,"name":"rgw_servers","options":[{"selected":true,"text":"All","value":"$__all"},{"selected":false,"text":"qcttwcoehd45","value":"qcttwcoehd45"},{"selected":false,"text":"qcttwcoehd46","value":"qcttwcoehd46"},{"selected":false,"text":"qcttwcoehd47","value":"qcttwcoehd47"},{"selected":false,"text":"qcttwcoehd48","value":"qcttwcoehd48"},{"selected":false,"text":"qcttwcoehd49","value":"qcttwcoehd49"},{"selected":false,"text":"qcttwcoehd50","value":"qcttwcoehd50"},{"selected":false,"text":"qcttwcoehd51","value":"qcttwcoehd51"},{"selected":false,"text":"qcttwcoehd52","value":"qcttwcoehd52"},{"selected":false,"text":"qcttwcoehd53","value":"qcttwcoehd53"},{"selected":false,"text":"qcttwcoehd54","value":"qcttwcoehd54"},{"selected":false,"text":"qcttwcoehd55","value":"qcttwcoehd55"},{"selected":false,"text":"qcttwcoehd56","value":"qcttwcoehd56"}],"query":"qcttwcoehd45,qcttwcoehd46,qcttwcoehd47,qcttwcoehd48,qcttwcoehd49,qcttwcoehd50,qcttwcoehd51,qcttwcoehd52,qcttwcoehd53,qcttwcoehd54,qcttwcoehd55,qcttwcoehd56","type":"custom"},{"allValue":null,"current":{"text":"All","value":"$__all"},"datasource":"Local","hide":2,"includeAll":true,"label":null,"multi":true,"name":"mon_servers","options":[],"query":"collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.mon_status.*","refresh":1,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"80","value":"80"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"disk_full_threshold","options":[{"selected":true,"text":"80","value":"80"}],"query":"80","type":"custom"},{"allValue":null,"current":{"text":"7d","value":"7d"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"growth_window","options":[{"selected":true,"text":"7d","value":"7d"}],"query":"7d","type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Ceph - At A Glance","version":49}} \ No newline at end of file diff --git a/dashboards/current/ceph-backend-storage.json b/dashboards/current/ceph-backend-storage.json new file mode 100644 index 0000000..e3c86f9 --- /dev/null +++ b/dashboards/current/ceph-backend-storage.json @@ -0,0 +1 @@ +{"meta":{"type":"db","canSave":true,"canEdit":true,"canStar":true,"slug":"ceph-backend-storage","expires":"0001-01-01T00:00:00Z","created":"2017-07-13T06:58:30Z","updated":"2017-07-20T07:42:30Z","updatedBy":"admin","createdBy":"admin","version":8},"dashboard":{"annotations":{"list":[]},"editable":false,"gnetId":null,"graphTooltip":0,"hideControls":true,"id":2,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["overview"],"targetBlank":true,"title":"Shortcuts","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":250,"panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateRdYlGn","exponent":0.5,"mode":"spectrum"},"dataFormat":"timeseries","description":"The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the count of disk utilization observations at that point.","heatmap":{},"highlightCards":true,"id":53,"links":[],"minSpan":7,"span":7,"targets":[{"refId":"A","target":"collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util","textEditor":true}],"title":"Disk Drive Utilization Heatmap","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":180,"xBucketSize":"","yAxis":{"decimals":null,"format":"short","logBase":1,"max":"100","min":"0","show":true,"splitFactor":null},"yBucketNumber":null,"yBucketSize":5},{"columns":[{"text":"Current","value":"current"}],"description":"","fontSize":"100%","id":55,"links":[],"minSpan":2,"pageSize":null,"scroll":true,"showHeader":true,"sort":{"col":1,"desc":false},"span":2,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"date"},{"alias":"Host and Disk","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"Metric","thresholds":[],"type":"number","unit":"short"},{"alias":"OSD ID","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"pattern":"Current","thresholds":[],"type":"number","unit":"short"}],"targets":[{"refId":"A","target":"aliasByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_id,1,-2)","textEditor":true}],"title":"OSD Summary","transform":"timeseries_aggregations","type":"table"},{"columns":[{"text":"Current","value":"current"}],"fontSize":"100%","hideTimeOverride":true,"id":56,"links":[],"minSpan":1,"pageSize":100,"scroll":true,"showHeader":true,"sort":{"col":null,"desc":false},"span":1,"styles":[{"alias":"OSD id","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"pattern":"Metric","sanitize":false,"thresholds":[""],"type":"string","unit":"short"},{"alias":"Status","colorMode":"cell","colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"pattern":"Current","thresholds":["0"],"type":"hidden","unit":"short"}],"targets":[{"hide":false,"refId":"B","target":"maximumAbove(currentBelow(groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.osd_state.*.up,-2,\"maxSeries\"),0.5),0)","textEditor":true}],"timeFrom":"1m","timeShift":null,"title":"OSD's Down","transform":"timeseries_aggregations","type":"table"},{"columns":[{"text":"Current","value":"current"}],"fontSize":"100%","id":57,"links":[],"minSpan":2,"pageSize":null,"scroll":true,"showHeader":true,"sort":{"col":0,"desc":true},"span":2,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"date"},{"alias":"% Full","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"pattern":"Host and Disk","thresholds":[],"type":"number","unit":"short"},{"alias":"% Full","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"Current","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":2,"pattern":"/.*/","thresholds":[],"type":"number","unit":"short"}],"targets":[{"refId":"A","target":"aliasByNode(currentAbove(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.fs_percent_used,$disk_full_threshold),1,-2)","textEditor":true}],"title":"Disks Near Full","transform":"timeseries_aggregations","type":"table"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk/OSD Heatmap over time","titleSize":"h5"},{"collapse":false,"height":"300","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":3,"id":54,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"refId":"A","target":"groupByNode(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.pools.*.recovering_bytes_per_sec,-2,\"maxSeries\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Recovery Workload by Pool","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Read Throughput":"#629E51","Write Throughput":"#E0752D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"Local","fill":1,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"refId":"A","target":"alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_bytes_per_sec), \"Read Throughput\")","textEditor":true},{"refId":"B","target":"alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_bytes_per_sec), \"Write Throughput\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Load - all OSDs","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"Read Latency":"#629E51","Write Latency":"#E0752D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"Local","fill":1,"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[{"dashUri":"db/latency-by-server","dashboard":"Latency by Server","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Latency by Server","type":"dashboard"}],"minSpan":6,"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"refId":"A","target":"alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.r_await),$percentile), \"Read Latency\")","textEditor":true},{"refId":"C","target":"alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.w_await),$percentile), \"Write Latency\")","textEditor":true}],"thresholds":[{"colorMode":"custom","fill":false,"line":true,"lineColor":"rgba(178, 0, 0, 0.29)","op":"gt","value":50}],"timeFrom":null,"timeShift":null,"title":"Disk Latency - all OSDs @ $percentile%ile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"disk busy %":"#3F6833"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":43,"legend":{"avg":false,"current":true,"max":false,"min":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[{"dashUri":"db/disk-busy-by-server","dashboard":"Disk Busy by Server","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Disk Busy by Server","type":"dashboard"}],"minSpan":6,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.util),$percentile), \"disk busy %\")","textEditor":true}],"thresholds":[{"colorMode":"custom","fill":false,"line":true,"lineColor":"rgba(178, 0, 0, 0.29)","op":"gt","value":70}],"timeFrom":null,"timeShift":null,"title":"Overall Disk Busy at $percentile%ile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"IOPS/spindle":"#3F6833"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":46,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),$percentile),\"IOPS/spindle\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS per Disk @ $percentile%ile - all OSDs","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"IOPS":"#3F6833"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":47,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[{"dashUri":"db/iops-by-server","dashboard":"IOPS by Server","includeVars":true,"keepTime":true,"targetBlank":true,"title":"IOPS by Server","type":"dashboard"}],"minSpan":6,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"IOPS\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Total Disk IOPS - all OSDs","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk/OSD Load Summary","titleSize":"h5"},{"collapse":true,"height":250,"panels":[{"aliasColors":{"CPU Busy":"#447EBC","CPU Busy @ 95%ile":"#629E51","Max CPU Busy":"#BF1B00","Max CPU Busy - all OSD Hosts":"#BF1B00"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":3,"id":44,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"Max CPU Busy - all OSD Hosts","fill":0,"linewidth":1}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"alias(percentileOfSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{wait,interrupt,nice,user,system}, 1, \"sumSeries\"),$percentile),\"CPU Busy @ $percentile%ile\")","textEditor":true},{"refId":"B","target":"alias(maxSeries(groupByNode(collectd.$osd_servers.$domain.cpu.percent.{wait,interrupt,nice,user,system}, 1, \"sumSeries\")),\"Max CPU Busy - all OSD Hosts\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Utilisation Overview - All OSD Hosts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"Network load (rx+tx)":"#3F6833"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":49,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[{"dashUri":"db/network-usage-by-node","dashboard":"Network Usage by Node","includeVars":true,"keepTime":true,"targetBlank":true,"title":"Network Load by Node","type":"dashboard"}],"minSpan":6,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"hide":true,"refId":"A","target":"collectd.$osd_servers.$domain.interface.*.if_octets.rx"},{"hide":true,"refId":"B","target":"collectd.$osd_servers.$domain.interface.*.if_octets.tx"},{"refId":"C","target":"alias(sumSeries(#A,#B), \"Network load (rx+tx)\")","targetFull":"alias(sumSeries(collectd.$osd_servers.$domain.interface.*.if_octets.rx,collectd.$osd_servers.$domain.interface.*.if_octets.tx), \"Network load (rx+tx)\")","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Load Across ALL OSDs","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"OSD Host CPU and Network Load","titleSize":"h5"}],"schemaVersion":14,"style":"dark","tags":[],"templating":{"list":[{"allValue":null,"current":{"selected":true,"text":"ceph","value":"ceph"},"datasource":"Local","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster_name","options":[],"query":"collectd.*.$domain.cephmetrics.gauge.*","refresh":1,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"selected":true,"text":"qct.com","value":"qct.com"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"domain","options":[{"selected":true,"text":"qct.com","value":"qct.com"}],"query":"qct.com","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"95","value":"95"},"hide":0,"includeAll":false,"label":"Percentile","multi":false,"name":"percentile","options":[{"selected":false,"text":"80","value":"80"},{"selected":false,"text":"85","value":"85"},{"selected":false,"text":"90","value":"90"},{"selected":true,"text":"95","value":"95"},{"selected":false,"text":"98","value":"98"}],"query":"80,85,90,95,98","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"All","value":"$__all"},"hide":2,"includeAll":true,"label":null,"multi":true,"name":"osd_servers","options":[{"selected":true,"text":"All","value":"$__all"},{"selected":false,"text":"qcttwcoec22","value":"qcttwcoec22"},{"selected":false,"text":"qcttwcoec23","value":"qcttwcoec23"},{"selected":false,"text":"qcttwcoec24","value":"qcttwcoec24"},{"selected":false,"text":"qcttwcoec25","value":"qcttwcoec25"},{"selected":false,"text":"qcttwcoec26","value":"qcttwcoec26"},{"selected":false,"text":"qcttwcoec27","value":"qcttwcoec27"},{"selected":false,"text":"qcttwcoec28","value":"qcttwcoec28"},{"selected":false,"text":"qcttwcoec29","value":"qcttwcoec29"},{"selected":false,"text":"qcttwcoec30","value":"qcttwcoec30"},{"selected":false,"text":"qcttwcoec31","value":"qcttwcoec31"},{"selected":false,"text":"qcttwcoec32","value":"qcttwcoec32"},{"selected":false,"text":"qcttwcoec33","value":"qcttwcoec33"},{"selected":false,"text":"qcttwcoec34","value":"qcttwcoec34"},{"selected":false,"text":"qcttwcoec35","value":"qcttwcoec35"},{"selected":false,"text":"qcttwcoec36","value":"qcttwcoec36"},{"selected":false,"text":"qcttwcoec37","value":"qcttwcoec37"},{"selected":false,"text":"qcttwcoec38","value":"qcttwcoec38"},{"selected":false,"text":"qcttwcoec39","value":"qcttwcoec39"},{"selected":false,"text":"qcttwcoec40","value":"qcttwcoec40"}],"query":"qcttwcoec22,qcttwcoec23,qcttwcoec24,qcttwcoec25,qcttwcoec26,qcttwcoec27,qcttwcoec28,qcttwcoec29,qcttwcoec30,qcttwcoec31,qcttwcoec32,qcttwcoec33,qcttwcoec34,qcttwcoec35,qcttwcoec36,qcttwcoec37,qcttwcoec38,qcttwcoec39,qcttwcoec40","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"80","value":"80"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"disk_full_threshold","options":[{"selected":true,"text":"80","value":"80"}],"query":"80","type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Ceph Backend Storage","version":8}} \ No newline at end of file diff --git a/dashboards/current/ceph-rgw-workload.json b/dashboards/current/ceph-rgw-workload.json new file mode 100644 index 0000000..9c46be2 --- /dev/null +++ b/dashboards/current/ceph-rgw-workload.json @@ -0,0 +1,1744 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": 18, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 267, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "250", + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount)", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum)", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "alias(divideSeries(sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum), sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount)),\"GET\")", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount)", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum)", + "textEditor": true + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "alias(divideSeries(sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum),sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount)), \"PUT\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average Request Latency - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "CPU Busy at $percentile%ile across all radosgw hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "250", + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "percentileOfSeries(groupByNode(collectd.$rgw_servers.$domain.cpu.percent.{system,user,wait,interrupt},1,\"sumSeries\"),$percentile)", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "RGW Hosts CPU Busy", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "Failed HTTP Requests": "#0A437C" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the number of failed/aborted requests across all rados gateways during the last 10secs", + "fill": 3, + "height": "250", + "id": 152, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.failed_req, 1, \"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Failed HTTP Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "250", + "id": 45, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.qlen)", + "textEditor": true + } + ], + "thresholds": "", + "title": "Request Queue Length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "290", + "id": 34, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "C", + "target": "groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.{get,put}, 1, \"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Total of GET/PUT HTTP traffic through all radosgw hosts", + "fill": 1, + "height": "290", + "id": 39, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "hide": false, + "refId": "C", + "target": "groupByNode(collectd.*.$domain.cephmetrics.derive.$cluster_name.rgw.{get_b,put_b}, 1, \"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Request Size - All RADOS Gateways", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RadosGW Overview - All Gateways", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "300", + "panels": [ + { + "content": "", + "height": "150", + "id": 50, + "links": [], + "minSpan": 1, + "mode": "markdown", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 1, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180", + "id": 43, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "sumSeries(collectd.$rgw_servers.$domain.cpu.percent.{user,system,wait,steal,softirq,interrupt})", + "textEditor": true + } + ], + "thresholds": "", + "title": "CPU Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "RAM Utilization on $rgw_servers - including a usage history covering the 12 hours", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180", + "hideTimeOverride": true, + "id": 46, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "collectd.$rgw_servers.$domain.memory.percent.used", + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "12h", + "timeShift": null, + "title": "RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the total network load on the rados gw host", + "fill": 1, + "height": "180", + "id": 150, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(sumSeries(consolidateBy(collectd.$rgw_servers.$domain.interface.{en,eth,bond}*.if_octets.rx, \"sum\")),-1)", + "textEditor": true + }, + { + "refId": "B", + "target": "aliasByNode(sumSeries(consolidateBy(collectd.$rgw_servers.$domain.interface.{en,eth,bond}*.if_octets.tx, \"sum\")),-1)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Failed HTTP Requests": "#0A437C" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the number of failed/aborted requests on this rados gateway during the last 10secs", + "fill": 3, + "height": "180", + "id": 151, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.failed_req, \"Failed HTTP Requests\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Failed HTTP Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get, 'GET')", + "textEditor": true + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put, 'PUT')", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(divideSeries(#B, #A),\"GET\")", + "targetFull": "alias(divideSeries(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_sum, collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_initial_lat_avgcount),\"GET\")", + "textEditor": true + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount" + }, + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + } + ], + "hide": true, + "measurement": "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.rgw.get_initial_lat", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.gauge.$cluster_name.rgw.get_initial_lat/ WHERE $timeFilter GROUP BY time($interval)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum" + }, + { + "refId": "F", + "target": "alias(divideSeries(#E,#D), \"PUT\")", + "targetFull": "alias(divideSeries(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_sum,collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_initial_lat_avgcount), \"PUT\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "id": 40, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "alias": "GET", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.get$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.get_b, 'GET')", + "textEditor": true + }, + { + "alias": "PUT", + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "collectd.obj-rgw-1.storage.lab.cephmetrics.derive.ceph.rgw.get", + "policy": "default", + "query": "SELECT mean(\"value\") FROM /collectd.$rgw_name.cephmetrics.derive.ceph.rgw.put$/ WHERE $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [], + "target": "alias(collectd.$rgw_servers.$domain.cephmetrics.derive.$cluster_name.rgw.put_b, 'PUT')", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP GET/PUT Request Sizes", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": "rgw_servers", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host : $rgw_servers", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "obj-rgw-1", + "value": "obj-rgw-1" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "rgw_servers", + "options": [ + { + "selected": true, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + ], + "query": "obj-rgw-1", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": true, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "90,95,98", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph RGW Workload", + "version": 1 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-rgw-workload", + "type": "db", + "updated": "2017-08-01T20:58:52Z", + "updatedBy": "admin@localhost", + "version": 1 + } +} \ No newline at end of file diff --git a/dashboards/current/disk-busy-by-server.json b/dashboards/current/disk-busy-by-server.json new file mode 100644 index 0000000..0df379e --- /dev/null +++ b/dashboards/current/disk-busy-by-server.json @@ -0,0 +1,594 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 10, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Hosts Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621235322, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621235322, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(group(collectd.$osd_servers.$domain.cephmetrics.gauge.$ceph_cluster.osd.*.perf.util),$percentile),\"all disk busy @$percentile%ile\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilisation @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Host's $percentile%ile Disk Utilisation", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "ceph_cluster", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Disk Busy by Server", + "version": 2 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "disk-busy-by-server", + "type": "db", + "updated": "2017-08-01T21:02:25Z", + "updatedBy": "admin@localhost", + "version": 2 + } +} \ No newline at end of file diff --git a/dashboards/current/iops-by-server.json b/dashboards/current/iops-by-server.json new file mode 100644 index 0000000..a423852 --- /dev/null +++ b/dashboards/current/iops-by-server.json @@ -0,0 +1,553 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 12, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Server IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501622535508, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501622535508, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.iops),\"total IOPS for all disks on $osd_servers\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Total OSD IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Node's IOPS Load", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "IOPS by Server", + "version": 4 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "iops-by-server", + "type": "db", + "updated": "2017-08-01T21:23:04Z", + "updatedBy": "admin@localhost", + "version": 4 + } +} \ No newline at end of file diff --git a/dashboards/current/latency-by-server.json b/dashboards/current/latency-by-server.json new file mode 100644 index 0000000..e932d8e --- /dev/null +++ b/dashboards/current/latency-by-server.json @@ -0,0 +1,573 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 13, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await,1,\"maxSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers OSD Hosts - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621443172, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "$osd_servers Node Details", + "type": "dashboard" + } + ], + "minSpan": 3, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1501621443172, + "repeatPanelId": 5, + "scopedVars": { + "osd_servers": { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(maxSeries(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.*.perf.await),\"$osd_servers Max Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Each OSD Host's Max Disk Latency", + "titleSize": "h5" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + } + ], + "query": "95", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Latency by Server", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "latency-by-server", + "type": "db", + "updated": "2017-08-01T21:04:34Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file diff --git a/dashboards/current/network-usage-by-node.json b/dashboards/current/network-usage-by-node.json new file mode 100644 index 0000000..9d17c81 --- /dev/null +++ b/dashboards/current/network-usage-by-node.json @@ -0,0 +1,577 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": 53, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "300px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the total transmit and receive throughput to all interfaces across the Ceph hosts.", + "fill": 1, + "height": "", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Host Details", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(sumSeries(groupByNode(collectd.$osd_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")),\"OSD Hosts\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(sumSeries(groupByNode(collectd.$rgw_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")),\"RadosGW Hosts\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(sumSeries(groupByNode(collectd.$mon_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")),\"MON Hosts\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Load Across MON, OSD and RADOSGW Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Aggregated Network Load", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$mon_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Ceph MON Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "MON Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$osd_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Host Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Hosts", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "250", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "groupByNode(collectd.$rgw_servers.$domain.interface.*.if_octets.*,1,\"sumSeries\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RadosGW Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RadosGW Hosts", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "label": "OSD Hostname", + "multi": true, + "name": "osd_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-osd-1", + "value": "obj-osd-1" + }, + { + "selected": false, + "text": "obj-osd-2", + "value": "obj-osd-2" + }, + { + "selected": false, + "text": "obj-osd-3", + "value": "obj-osd-3" + } + ], + "query": "obj-osd-1,obj-osd-2,obj-osd-3", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "Monitor Host", + "multi": false, + "name": "mon_servers", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.mon_status.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "label": "RadosGW Host", + "multi": false, + "name": "rgw_servers", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "obj-rgw-1", + "value": "obj-rgw-1" + } + ], + "query": "obj-rgw-1", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Network Usage by Node", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T05:18:58Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "network-usage-by-node", + "type": "db", + "updated": "2017-08-01T20:38:43Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file diff --git a/dashboards/current/osd-node-detail.json b/dashboards/current/osd-node-detail.json new file mode 100644 index 0000000..96e6265 --- /dev/null +++ b/dashboards/current/osd-node-detail.json @@ -0,0 +1 @@ +{"meta":{"type":"db","canSave":true,"canEdit":true,"canStar":true,"slug":"osd-node-detail","expires":"0001-01-01T00:00:00Z","created":"2017-07-21T02:18:32Z","updated":"2017-07-21T21:38:30Z","updatedBy":"admin@localhost","createdBy":"admin@localhost","version":7},"dashboard":{"annotations":{"list":[]},"editable":false,"gnetId":null,"graphTooltip":0,"hideControls":true,"id":11,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":125,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":null,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"height":"160","id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":"","minSpan":2,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"refId":"A","target":"sumSeries(keepLastValue(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.num_osds,6))","textEditor":true}],"thresholds":"","title":"OSDs","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"columns":[{"text":"Current","value":"current"}],"description":"","fontSize":"100%","height":"160","id":14,"links":[],"minSpan":3,"pageSize":1000,"scroll":true,"showHeader":true,"sort":{"col":1,"desc":false},"span":3,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"date"},{"alias":"Host and Disk","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"Metric","thresholds":[],"type":"number","unit":"short"},{"alias":"OSD ID","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"pattern":"Current","thresholds":[],"type":"number","unit":"short"}],"targets":[{"refId":"A","target":"aliasByNode(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.osd_id,1,-2)","textEditor":true}],"title":"","transform":"timeseries_aggregations","type":"table"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"$osd_servers OSD Details","titleSize":"h5"},{"collapse":false,"height":250,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.util, $max_devices), -3)","textEditor":false}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers Disk utilisation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"%Util","logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.await, $max_devices), -3)","textEditor":false}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers Disk Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"ms","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"","titleSize":"h5"},{"collapse":false,"height":250,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.bytes_per_sec, $max_devices), -3)","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers Throughput by Disk","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":"","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"refId":"A","target":"aliasByNode(limit(collectd.$osd_servers.$domain.cephmetrics.gauge.$cluster_name.osd.$device_id.perf.iops, $max_devices), -3)"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers Disk IOPS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"IOPS","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6"},{"collapse":false,"height":250,"panels":[{"aliasColors":{"interrupt":"#447EBC","steal":"#6D1F62","system":"#890F02","user":"#3F6833","wait":"#C15C17"},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"description":"Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown","fill":3,"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"refId":"A","target":"limit(aliasByMetric(collectd.$osd_servers.$domain.cpu.percent.{system,user,wait,steal,nice,interrupt,softirq}),7)","textEditor":true}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers CPU Utilisation","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":6,"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"refId":"A","target":"aliasByNode(limit(collectd.$osd_servers.$domain.interface.{eth,en,bond}*.if_octets.*, 40), -3, -1)","textEditor":false}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$osd_servers Network Load","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":[],"templating":{"list":[{"allValue":null,"current":{"selected":true,"tags":[],"text":"All","value":"$__all"},"hide":0,"includeAll":true,"label":"OSD Host Name","multi":false,"name":"osd_servers","options":[{"selected":true,"text":"All","value":"$__all"},{"selected":false,"text":"obj-osd-1","value":"obj-osd-1"},{"selected":false,"text":"obj-osd-2","value":"obj-osd-2"},{"selected":false,"text":"obj-osd-3","value":"obj-osd-3"}],"query":"obj-osd-1,obj-osd-2,obj-osd-3","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"storage.lab","value":"storage.lab"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"domain","options":[{"selected":true,"text":"storage.lab","value":"storage.lab"}],"query":"storage.lab","type":"custom"},{"allValue":null,"current":{"selected":true,"text":"ceph","value":"ceph"},"datasource":"Local","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster_name","options":[],"query":"collectd.*.$domain.cephmetrics.gauge.*","refresh":1,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"All","value":"$__all"},"datasource":"Local","hide":0,"includeAll":true,"label":"Disk Name","multi":true,"name":"device_id","options":[],"query":"collectd.$osd_servers.$domain.cephmetrics.gauge.ceph.osd.*","refresh":1,"regex":"/^[^\\d].*/","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"60","value":"60"},"hide":2,"includeAll":false,"label":null,"multi":false,"name":"max_devices","options":[{"selected":true,"text":"60","value":"60"}],"query":"60","type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"OSD Node Detail","version":7}} \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..c102b62 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,23 @@ +version: '2' + +services: + grafana: + image: docker.io/grafana/grafana:latest + container_name: grafana + ports: + - "3000:3000" + volumes: + - /opt/docker/grafana/etc:/etc/grafana:Z + - /opt/docker/grafana/data:/var/lib/grafana:Z + depends_on: + - graphite + graphite: + image: docker.io/abezhenar/graphite-centos7 + container_name: graphite + ports: + - "80:80" + - "2003:2003" + - "2004:2004" + - "7002:7002" + volumes: + - /opt/docker/graphite:/var/lib/graphite/storage/whisper:Z diff --git a/etc/collectd.conf b/etc/collectd.conf new file mode 100644 index 0000000..5c8a26d --- /dev/null +++ b/etc/collectd.conf @@ -0,0 +1,18 @@ +# Hostname "obj-mon-1.storage.lab" +BaseDir "/var/lib/collectd" +PluginDir "/usr/lib64/collectd" + +TypesDB "/usr/share/collectd/types.db" + +Interval 10 +Timeout 5 +ReadThreads 5 + + + Globals true + + +LoadPlugin "threshold" +LoadPlugin "aggregation" + +Include "/etc/collectd.d/*.conf" diff --git a/etc/collectd.d/cephmetrics.conf b/etc/collectd.d/cephmetrics.conf new file mode 100644 index 0000000..b669f2d --- /dev/null +++ b/etc/collectd.d/cephmetrics.conf @@ -0,0 +1,10 @@ +LoadPlugin "python" + + + ModulePath "/usr/lib64/collectd/cephmetrics" + + Import "cephmetrics" + + ClusterName "ceph" + + diff --git a/etc/collectd.d/cpu.conf b/etc/collectd.d/cpu.conf new file mode 100644 index 0000000..bd65ef0 --- /dev/null +++ b/etc/collectd.d/cpu.conf @@ -0,0 +1,17 @@ +LoadPlugin cpu + + + ValuesPercentage true + ReportByCpu false + + + + + + Instance user + FailureMax 90 + WarningMax 80 + + + + diff --git a/etc/collectd.d/memory.conf b/etc/collectd.d/memory.conf new file mode 100644 index 0000000..481950c --- /dev/null +++ b/etc/collectd.d/memory.conf @@ -0,0 +1,26 @@ +LoadPlugin memory + + + ValuesPercentage true + + + + + Plugin "memory" + Type "memory" + + GroupBy "Host" + + CalculateSum true + + + + + + + Instance used + FailureMax 90 + WarningMax 80 + + + diff --git a/etc/collectd.d/nics.conf b/etc/collectd.d/nics.conf new file mode 100644 index 0000000..7494ca1 --- /dev/null +++ b/etc/collectd.d/nics.conf @@ -0,0 +1 @@ +LoadPlugin interface diff --git a/etc/collectd.d/write_graphite.conf b/etc/collectd.d/write_graphite.conf new file mode 100644 index 0000000..b93ea61 --- /dev/null +++ b/etc/collectd.d/write_graphite.conf @@ -0,0 +1,19 @@ +LoadPlugin "write_graphite" + + + Host "graphite.storage.lab" + Port "2003" + Protocol "tcp" +# ReconnectInterval 0 + LogSendErrors true + Prefix "collectd." +# Postfix "collectd" + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + PreserveSeparator true + SeparateInstances true +# DropDuplicateFields true + + + diff --git a/etc/grafana/grafana.ini b/etc/grafana/grafana.ini new file mode 100644 index 0000000..e95cdc7 --- /dev/null +++ b/etc/grafana/grafana.ini @@ -0,0 +1,408 @@ +##################### Grafana Configuration Example ##################### +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +; app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +; instance_name = ${HOSTNAME} + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +# +;data = /var/lib/grafana +# +# Directory where grafana can store logs +# +;logs = /var/log/grafana +# +# Directory where grafana will automatically scan and look for plugins +# +;plugins = /var/lib/grafana/plugins + +# +#################################### Server #################################### +[server] +# Protocol (http, https, socket) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +;http_port = 3000 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +;root_url = http://localhost:3000 +root_url = %(protocol)s://%(domain)s:%(http_port)s/ + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# Unix socket path +;socket = + +#################################### Database #################################### +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as seperate properties or as on string using the url propertie. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +;password = + +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +;url = + +# For "postgres" only, either "disable", "require" or "verify-full" +;ssl_mode = disable + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +# Max conn setting default is 0 (mean not set) +;max_idle_conn = +;max_open_conn = + + +#################################### Session #################################### +[session] +# Either "memory", "file", "redis", "mysql", "postgres", default is "file" +;provider = file + +# Provider config options +# memory: not have any config yet +# file: session dir path, is relative to grafana data_path +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` +# mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` +# postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable +;provider_config = sessions + +# Session cookie name +;cookie_name = grafana_sess + +# If you use session in https only, default is false +;cookie_secure = false + +# Session life time, default is 86400 +;session_life_time = 86400 + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +;logging = false + + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +;reporting_enabled = true + +# Set to false to disable all checks to https://grafana.net +# for new vesions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to http://grafana.com to get latest versions +;check_for_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +#################################### Security #################################### +[security] +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# Auto-login remember days +;login_remember_days = 7 +;cookie_username = grafana_user +;cookie_remember_name = grafana_remember + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +[snapshots] +# snapshot sharing options +;external_enabled = true +;external_snapshot_url = https://snapshots-origin.raintank.io +;external_snapshot_name = Publish to snapshot.raintank.io + +# remove expired snapshot +;snapshot_remove_expired = true + +# remove snapshots after 90 days +;snapshot_TTL_days = 90 + +#################################### Users #################################### +[users] +# disable user signup / registration +;allow_sign_up = true + +# Allow non admin users to create organizations +;allow_org_create = true + +# Set to true to automatically assign new users to the default organization (id 1) +;auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +;auto_assign_org_role = Viewer + +# Background text for the user field on the login page +;login_hint = email or username + +# Default UI theme ("dark" or "light") +;default_theme = dark + +[auth] +# Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false +;disable_login_form = false + +# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false +;disable_signout_menu = false + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +;enabled = false + +# specify organization name that should be used for unauthenticated users +;org_name = Main Org. + +# specify role for unauthenticated users +;org_role = Viewer + +#################################### Github Auth ########################## +[auth.github] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://github.com/login/oauth/authorize +;token_url = https://github.com/login/oauth/access_token +;api_url = https://api.github.com/user +;team_ids = +;allowed_organizations = + +#################################### Google Auth ########################## +[auth.google] +;enabled = false +;allow_sign_up = true +;client_id = some_client_id +;client_secret = some_client_secret +;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email +;auth_url = https://accounts.google.com/o/oauth2/auth +;token_url = https://accounts.google.com/o/oauth2/token +;api_url = https://www.googleapis.com/oauth2/v1/userinfo +;allowed_domains = + +#################################### Generic OAuth ########################## +[auth.generic_oauth] +;enabled = false +;name = OAuth +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://foo.bar/login/oauth/authorize +;token_url = https://foo.bar/login/oauth/access_token +;api_url = https://foo.bar/user +;team_ids = +;allowed_organizations = + +#################################### Grafana.com Auth #################### +[auth.grafananet] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email +;allowed_organizations = + +#################################### Auth Proxy ########################## +[auth.proxy] +;enabled = false +;header_name = X-WEBAUTH-USER +;header_property = username +;auto_sign_up = true +;ldap_sync_ttl = 60 +;whitelist = 192.168.1.1, 192.168.2.1 + +#################################### Basic Auth ########################## +[auth.basic] +;enabled = true + +#################################### Auth LDAP ########################## +[auth.ldap] +;enabled = false +;config_file = /etc/grafana/ldap.toml +;allow_sign_up = true + +#################################### SMTP / Emailing ########################## +[smtp] +;enabled = false +;host = localhost:25 +;user = +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +;password = +;cert_file = +;key_file = +;skip_verify = false +;from_address = admin@grafana.localhost +;from_name = Grafana + +[emails] +;welcome_email_on_sign_up = false + +#################################### Logging ########################## +[log] +# Either "console", "file", "syslog". Default is console and file +# Use space to separate multiple modes, e.g. "console file" +;mode = console file + +# Either "trace", "debug", "info", "warn", "error", "critical", default is "info" +;level = info + +# optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug +;filters = + + +# For "console" mode only +[log.console] +;level = + +# log line format, valid options are text, console and json +;format = console + +# For "file" mode only +[log.file] +;level = + +# log line format, valid options are text, console and json +;format = text + +# This enables automated log rotate(switch of following options), default is true +;log_rotate = true + +# Max line number of single file, default is 1000000 +;max_lines = 1000000 + +# Max size shift of single file, default is 28 means 1 << 28, 256MB +;max_size_shift = 28 + +# Segment log daily, default is true +;daily_rotate = true + +# Expired days of log file(delete after max days), default is 7 +;max_days = 7 + +[log.syslog] +;level = + +# log line format, valid options are text, console and json +;format = text + +# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. +;network = +;address = + +# Syslog facility. user, daemon and local0 through local7 are valid. +;facility = + +# Syslog tag. By default, the process' argv[0] is used. +;tag = + + +#################################### AMQP Event Publisher ########################## +[event_publisher] +;enabled = false +;rabbitmq_url = amqp://localhost/ +;exchange = grafana_events + +;#################################### Dashboard JSON files ########################## +[dashboards.json] +;enabled = false +;path = /var/lib/grafana/dashboards + +#################################### Alerting ############################ +[alerting] +# Disable alerting engine & UI features +;enabled = true +# Makes it possible to turn off alert rule execution but alerting UI is visible +;execute_alerts = true + +#################################### Internal Grafana Metrics ########################## +# Metrics available at HTTP API Url /api/metrics +[metrics] +# Disable / Enable internal metrics +;enabled = true + +# Publish interval +;interval_seconds = 10 + +# Send internal metrics to Graphite +[metrics.graphite] +# Enable by setting the address setting (ex localhost:2003) +;address = +;prefix = prod.grafana.%(instance_name)s. + +#################################### Grafana.com integration ########################## +# Url used to to import dashboards directly from Grafana.com +[grafana_net] +;url = https://grafana.com + +#################################### External image storage ########################## +[external_image_storage] +# Used for uploading images to public servers so they can be included in slack/email messages. +# you can choose between (s3, webdav) +;provider = + +[external_image_storage.s3] +;bucket_url = +;access_key = +;secret_key = + +[external_image_storage.webdav] +;url = +;public_url = +;username = +;password = diff --git a/screenshots/archive/dashboard-2017-05-19.png b/screenshots/archive/dashboard-2017-05-19.png new file mode 100644 index 0000000..fde57c5 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-19.png differ diff --git a/screenshots/archive/dashboard-2017-05-24.png b/screenshots/archive/dashboard-2017-05-24.png new file mode 100644 index 0000000..3701904 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-24.png differ diff --git a/screenshots/archive/dashboard-2017-05-26.png b/screenshots/archive/dashboard-2017-05-26.png new file mode 100644 index 0000000..611f9d6 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-26.png differ diff --git a/screenshots/archive/dashboard-2017-05-29.png b/screenshots/archive/dashboard-2017-05-29.png new file mode 100644 index 0000000..6089f79 Binary files /dev/null and b/screenshots/archive/dashboard-2017-05-29.png differ diff --git a/screenshots/at-a-glance.png b/screenshots/at-a-glance.png new file mode 100644 index 0000000..466a879 Binary files /dev/null and b/screenshots/at-a-glance.png differ diff --git a/screenshots/ceph-backend.png b/screenshots/ceph-backend.png new file mode 100644 index 0000000..8da34f0 Binary files /dev/null and b/screenshots/ceph-backend.png differ diff --git a/screenshots/ceph-frontend.png b/screenshots/ceph-frontend.png new file mode 100644 index 0000000..cb3dee4 Binary files /dev/null and b/screenshots/ceph-frontend.png differ diff --git a/screenshots/ceph-rados.png b/screenshots/ceph-rados.png new file mode 100644 index 0000000..630d4ad Binary files /dev/null and b/screenshots/ceph-rados.png differ diff --git a/screenshots/ceph-rgw.png b/screenshots/ceph-rgw.png new file mode 100644 index 0000000..ad271c9 Binary files /dev/null and b/screenshots/ceph-rgw.png differ diff --git a/screenshots/disk-busy-by-server.png b/screenshots/disk-busy-by-server.png new file mode 100644 index 0000000..2b5d4a2 Binary files /dev/null and b/screenshots/disk-busy-by-server.png differ diff --git a/screenshots/disk-latency-by-server.png b/screenshots/disk-latency-by-server.png new file mode 100644 index 0000000..8bfa17a Binary files /dev/null and b/screenshots/disk-latency-by-server.png differ diff --git a/screenshots/iops-by-server.png b/screenshots/iops-by-server.png new file mode 100644 index 0000000..189ca46 Binary files /dev/null and b/screenshots/iops-by-server.png differ diff --git a/screenshots/network-load.png b/screenshots/network-load.png new file mode 100644 index 0000000..6f49966 Binary files /dev/null and b/screenshots/network-load.png differ diff --git a/screenshots/osd-node-details.png b/screenshots/osd-node-details.png new file mode 100644 index 0000000..21be086 Binary files /dev/null and b/screenshots/osd-node-details.png differ diff --git a/selinux/cephmetrics.te b/selinux/cephmetrics.te new file mode 100644 index 0000000..9e95937 --- /dev/null +++ b/selinux/cephmetrics.te @@ -0,0 +1,32 @@ +policy_module(cephmetrics, 1.0.0) + +require { + type collectd_t; + type ceph_t; + type ceph_var_run_t; + type ceph_var_lib_t; + type fixed_disk_device_t; + class unix_stream_socket connectto; + class dir read; + class file getattr; + class capability2 block_suspend; +} + +#============= collectd_t ============== + +#!!!! This avc can be allowed using the boolean 'daemons_enable_cluster_mode' +allow collectd_t ceph_t:unix_stream_socket connectto; +allow collectd_t ceph_var_run_t:dir read; +allow collectd_t ceph_var_lib_t:file getattr; +allow collectd_t ceph_var_lib_t:lnk_file getattr; +allow collectd_t ceph_var_lib_t:lnk_file read; +allow collectd_t fixed_disk_device_t:blk_file getattr; +allow collectd_t self:capability2 block_suspend; +allow collectd_t var_log_t:dir { add_name write }; +allow collectd_t var_log_t:file create; +corecmd_exec_shell(collectd_t) +files_list_tmp(collectd_t) +libs_exec_ldconfig(collectd_t) +libs_manage_lib_dirs(collectd_t) +libs_manage_lib_files(collectd_t) +logging_write_generic_logs(collectd_t) diff --git a/setup/add_datasource.json b/setup/add_datasource.json new file mode 100644 index 0000000..64d9ef8 --- /dev/null +++ b/setup/add_datasource.json @@ -0,0 +1,8 @@ +{ + "name":"Local", + "type":"graphite", + "url":"http://192.168.1.52", + "access":"proxy", + "basicAuth":false, + "isDefault":true +} diff --git a/tests/testosd.py b/tests/testosd.py new file mode 100644 index 0000000..53dc139 --- /dev/null +++ b/tests/testosd.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +from collectors.osd import OSDs +from collectors.common import flatten_dict + +import time + +def main(): + o = OSDs('ceph') + ctr = 0 + while ctr < 30: + + s = o.get_stats() + print(s) + print(flatten_dict(s)) + + time.sleep(1) + ctr += 1 + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..4db87b8 --- /dev/null +++ b/tox.ini @@ -0,0 +1,22 @@ +[tox] +skipsdist = True + +[testenv:ansible-lint] +install_command = pip install --upgrade {opts} {packages} +deps= + ansible-lint +commands=ansible-lint -x ANSIBLE0010,ANSIBLE0012,ANSIBLE0017 ansible/playbook.yml + +[testenv:ansible-syntax] +install_command = pip install --upgrade {opts} {packages} +deps= + ansible==2.2.3.0 +changedir=ansible +commands= + ansible-playbook -i '127.0.0.1,' playbook.yml --syntax-check -vv + +[testenv:flake8] +install_command = pip install --upgrade {opts} {packages} +deps= + flake8 +commands=flake8 --select=F,E9 {posargs:*.py collectors tests}