From: Alfredo Deza Date: Mon, 20 Jun 2016 12:43:13 +0000 (-0400) Subject: ansible: create a graphite role X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=18844398376a450d1c2428d141b84cb359379212;p=ceph-build.git ansible: create a graphite role Signed-off-by: Alfredo Deza --- diff --git a/ansible/roles/graphite/defaults/main.yml b/ansible/roles/graphite/defaults/main.yml new file mode 100644 index 00000000..dea45f01 --- /dev/null +++ b/ansible/roles/graphite/defaults/main.yml @@ -0,0 +1,4 @@ +--- + +app_name: "graphite" +fqdn: "graphite.local" diff --git a/ansible/roles/graphite/handlers/main.yml b/ansible/roles/graphite/handlers/main.yml new file mode 100644 index 00000000..251cf3a6 --- /dev/null +++ b/ansible/roles/graphite/handlers/main.yml @@ -0,0 +1,20 @@ +--- + +- name: reload systemd + sudo: yes + command: systemctl daemon-reload + +- name: restart app + sudo: true + service: + name: graphite + state: restarted + enabled: yes + +- name: restart carbon + service: + name: carbon-cache + state: restarted + enabled: yes + sudo: yes + diff --git a/ansible/roles/graphite/tasks/carbon.yml b/ansible/roles/graphite/tasks/carbon.yml new file mode 100644 index 00000000..4424f94d --- /dev/null +++ b/ansible/roles/graphite/tasks/carbon.yml @@ -0,0 +1,49 @@ +--- + +- name: enable carbon + lineinfile: + dest: /etc/default/graphite-carbon + regexp: "^CARBON_CACHE_ENABLED=false" + line: "CARBON_CACHE_ENABLED=true" + state: present + sudo: true + +- name: enable whitelisting in carbon + lineinfile: + dest: /etc/carbon/carbon.conf + regexp: "^# USE_WHITELIST = False" + line: "USE_WHITELIST = True" + state: present + backrefs: true + sudo: true + +- name: create the rewrite config with the secret api key + template: + src: ../templates/rewrite-rules.conf.j2 + dest: "/etc/carbon/rewrite-rules.conf" + notify: + - restart carbon + sudo: true + +- name: create the whitelist/blacklist config allowing the api key only + template: + src: ../templates/whitelist.conf.j2 + dest: "/etc/carbon/whitelist.conf" + notify: + - restart carbon + sudo: true + +- name: define the storage schemas + template: + src: ../templates/storage-schemas.conf.j2 + dest: "/etc/carbon/storage-schemas.conf" + notify: + - restart carbon + sudo: true + +- name: ensure database service is up + service: + name: carbon-cache + state: restarted + enabled: yes + sudo: yes diff --git a/ansible/roles/graphite/tasks/main.yml b/ansible/roles/graphite/tasks/main.yml new file mode 100644 index 00000000..e3d8b0d6 --- /dev/null +++ b/ansible/roles/graphite/tasks/main.yml @@ -0,0 +1,54 @@ +--- + +- name: "Build hosts file" + sudo: yes + lineinfile: dest=/etc/hosts regexp=".*{{ fqdn }}$" line="127.0.1.1 {{ fqdn }}" state=present + +- name: Set Hostname with hostname command + sudo: yes + hostname: name="{{ fqdn }}" + +- name: update apt cache + apt: + update_cache: yes + sudo: yes + +- name: install ssl system requirements + sudo: yes + apt: + name: "{{ item }}" + state: present + with_items: ssl_requirements + tags: + - packages + +- name: install system packages + sudo: yes + apt: + name: "{{ item }}" + state: present + with_items: system_packages + tags: + - packages + +- command: cp /usr/share/graphite-web/graphite.wsgi /usr/lib/python2.7/dist-packages/graphite/graphite_web.py + args: + creates: "/usr/lib/python2.7/dist-packages/graphite/graphite_web.py" + sudo: true + +- include: carbon.yml + +- include: systemd.yml + tags: + - systemd + +- include: postgresql.yml + tags: + - postgresql + +- name: ensure graphite is running + sudo: true + service: + name: graphite + state: restarted + enabled: yes diff --git a/ansible/roles/graphite/tasks/postgresql.yml b/ansible/roles/graphite/tasks/postgresql.yml new file mode 100644 index 00000000..478d477e --- /dev/null +++ b/ansible/roles/graphite/tasks/postgresql.yml @@ -0,0 +1,71 @@ +--- +- name: ensure database service is up + service: + name: postgresql + state: started + enabled: yes + sudo: yes + +- name: allow users to connect locally + sudo: yes + lineinfile: + # TODO: should not hardcode that version + dest: /etc/postgresql/9.5/main/pg_hba.conf + regexp: '^host\s+all\s+all\s+127.0.0.1/32' + line: 'host all all 127.0.0.1/32 md5' + backrefs: yes + register: pg_hba_conf + +- service: + name: postgresql + state: restarted + sudo: true + when: pg_hba_conf.changed + +- name: generate pseudo-random password for the database connection + shell: python -c "exec 'import os; print os.urandom(30).encode(\'base64\')[:${length}]'" + register: db_password + changed_when: false + +- name: make {{ app_name }} user + postgresql_user: + name: "{{ app_name }}" + password: "{{ db_password.stdout }}" + role_attr_flags: SUPERUSER + login_user: postgres + become_user: postgres + become: yes + +- name: Make {{ app_name }} database + postgresql_db: + name: "{{ app_name }}" + owner: "{{ app_name }}" + state: present + login_user: postgres + sudo_user: postgres + sudo: yes + +- name: ensure database service is up + service: + name: postgresql + state: started + enabled: yes + sudo: yes + +- name: create the config file with the db password + template: + src: ../templates/local_settings.py.j2 + dest: "/etc/graphite/local_settings.py" + notify: + - restart app + sudo: true + + # there is a bug where if you don't migrate auth first only it will fail + # with "ProgrammingError: relation "auth_user" does not exist" +- name: run migrate for auth first + command: graphite-manage migrate --noinput auth + sudo: true + +- name: run migrate to ensure database schema + command: graphite-manage migrate --noinput + sudo: true diff --git a/ansible/roles/graphite/tasks/systemd.yml b/ansible/roles/graphite/tasks/systemd.yml new file mode 100644 index 00000000..c702e71d --- /dev/null +++ b/ansible/roles/graphite/tasks/systemd.yml @@ -0,0 +1,25 @@ +--- + +- name: ensure /var/log/graphite dir exists + sudo: true + file: + path: /var/log/graphite + state: directory + owner: _graphite + group: _graphite + recurse: yes + +- name: install the systemd unit file for graphite + template: + src: systemd/graphite.service.j2 + dest: /etc/systemd/system/graphite.service + sudo: true + notify: + - reload systemd + +- name: ensure graphite is enabled and running + sudo: true + service: + name: graphite + state: running + enabled: yes diff --git a/ansible/roles/graphite/templates/local_settings.py.j2 b/ansible/roles/graphite/templates/local_settings.py.j2 new file mode 100644 index 00000000..72b111bf --- /dev/null +++ b/ansible/roles/graphite/templates/local_settings.py.j2 @@ -0,0 +1,223 @@ +# {{ ansible_managed }} +## Graphite local_settings.py +# Edit this file to customize the default Graphite webapp settings +# +# Additional customizations to Django settings can be added to this file as well + +##################################### +# General Configuration # +##################################### +# Set this to a long, random unique string to use as a secret key for this +# install. This key is used for salting of hashes used in auth tokens, +# CRSF middleware, cookie storage, etc. This should be set identically among +# instances if used behind a load balancer. +#SECRET_KEY = 'UNSAFE_DEFAULT' + +# In Django 1.5+ set this to the list of hosts your graphite instances is +# accessible as. See: +# https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-ALLOWED_HOSTS +#ALLOWED_HOSTS = [ '*' ] + +# Set your local timezone (Django's default is America/Chicago) +# If your graphs appear to be offset by a couple hours then this probably +# needs to be explicitly set to your local timezone. +#TIME_ZONE = 'America/Los_Angeles' + +# Override this to provide documentation specific to your Graphite deployment +#DOCUMENTATION_URL = "http://graphite.readthedocs.org/" + +# Logging +# True see: https://answers.launchpad.net/graphite/+question/159731 +LOG_RENDERING_PERFORMANCE = True +LOG_CACHE_PERFORMANCE = True +LOG_METRIC_ACCESS = True + +# Enable full debug page display on exceptions (Internal Server Error pages) +#DEBUG = True + +# If using RRD files and rrdcached, set to the address or socket of the daemon +#FLUSHRRDCACHED = 'unix:/var/run/rrdcached.sock' + +# This lists the memcached servers that will be used by this webapp. +# If you have a cluster of webapps you should ensure all of them +# have the *exact* same value for this setting. That will maximize cache +# efficiency. Setting MEMCACHE_HOSTS to be empty will turn off use of +# memcached entirely. +# +# You should not use the loopback address (127.0.0.1) here if using clustering +# as every webapp in the cluster should use the exact same values to prevent +# unneeded cache misses. Set to [] to disable caching of images and fetched data +#MEMCACHE_HOSTS = ['10.10.10.10:11211', '10.10.10.11:11211', '10.10.10.12:11211'] +#DEFAULT_CACHE_DURATION = 60 # Cache images and data for 1 minute + + +##################################### +# Filesystem Paths # +##################################### +# Change only GRAPHITE_ROOT if your install is merely shifted from /opt/graphite +# to somewhere else +GRAPHITE_ROOT = '/usr/share/graphite-web' + +# Most installs done outside of a separate tree such as /opt/graphite will only +# need to change these three settings. Note that the default settings for each +# of these is relative to GRAPHITE_ROOT +CONF_DIR = '/etc/graphite' +STORAGE_DIR = '/var/lib/graphite/whisper' +CONTENT_DIR = '/usr/share/graphite-web/static' + +# To further or fully customize the paths, modify the following. Note that the +# default settings for each of these are relative to CONF_DIR and STORAGE_DIR +# +## Webapp config files +#DASHBOARD_CONF = '/opt/graphite/conf/dashboard.conf' +#GRAPHTEMPLATES_CONF = '/opt/graphite/conf/graphTemplates.conf' + +## Data directories +# NOTE: If any directory is unreadable in DATA_DIRS it will break metric browsing +WHISPER_DIR = '/var/lib/graphite/whisper' +#RRD_DIR = '/opt/graphite/storage/rrd' +#DATA_DIRS = [WHISPER_DIR, RRD_DIR] # Default: set from the above variables +LOG_DIR = '/var/log/graphite' +INDEX_FILE = '/var/lib/graphite/search_index' # Search index file + + +##################################### +# Email Configuration # +##################################### +# This is used for emailing rendered Graphs +# Default backend is SMTP +#EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' +#EMAIL_HOST = 'localhost' +#EMAIL_PORT = 25 +#EMAIL_HOST_USER = '' +#EMAIL_HOST_PASSWORD = '' +#EMAIL_USE_TLS = False +# To drop emails on the floor, enable the Dummy backend: +#EMAIL_BACKEND = 'django.core.mail.backends.dummy.EmailBackend' + + +##################################### +# Authentication Configuration # +##################################### +## LDAP / ActiveDirectory authentication setup +#USE_LDAP_AUTH = True +#LDAP_SERVER = "ldap.mycompany.com" +#LDAP_PORT = 389 +# OR +#LDAP_URI = "ldaps://ldap.mycompany.com:636" +#LDAP_SEARCH_BASE = "OU=users,DC=mycompany,DC=com" +#LDAP_BASE_USER = "CN=some_readonly_account,DC=mycompany,DC=com" +#LDAP_BASE_PASS = "readonly_account_password" +#LDAP_USER_QUERY = "(username=%s)" #For Active Directory use "(sAMAccountName=%s)" +# +# If you want to further customize the ldap connection options you should +# directly use ldap.set_option to set the ldap module's global options. +# For example: +# +#import ldap +#ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) +#ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, "/etc/ssl/ca") +#ldap.set_option(ldap.OPT_X_TLS_CERTFILE, "/etc/ssl/mycert.pem") +#ldap.set_option(ldap.OPT_X_TLS_KEYFILE, "/etc/ssl/mykey.pem") +# See http://www.python-ldap.org/ for further details on these options. + +## REMOTE_USER authentication. See: https://docs.djangoproject.com/en/dev/howto/auth-remote-user/ +#USE_REMOTE_USER_AUTHENTICATION = True + +# Override the URL for the login link (e.g. for django_openid_auth) +#LOGIN_URL = '/account/login' + + +########################## +# Database Configuration # +########################## +# By default sqlite is used. If you cluster multiple webapps you will need +# to setup an external database (such as MySQL) and configure all of the webapp +# instances to use the same database. Note that this database is only used to store +# Django models such as saved graphs, dashboards, user preferences, etc. +# Metric data is not stored here. +# +# DO NOT FORGET TO RUN 'manage.py syncdb' AFTER SETTING UP A NEW DATABASE +# +# The following built-in database engines are available: +# django.db.backends.postgresql # Removed in Django 1.4 +# django.db.backends.postgresql_psycopg2 +# django.db.backends.mysql +# django.db.backends.sqlite3 +# django.db.backends.oracle +# +# The default is 'django.db.backends.sqlite3' with file 'graphite.db' +# located in STORAGE_DIR +# +DATABASES = { + 'default': { + 'NAME': '{{ app_name }}', + 'ENGINE': 'django.db.backends.postgresql_psycopg2', + 'USER': '{{ app_name }}', + 'PASSWORD': '{{ db_password.stdout }}', + 'HOST': 'localhost', + 'PORT': '' + } +} + + + +######################### +# Cluster Configuration # +######################### +# (To avoid excessive DNS lookups you want to stick to using IP addresses only in this entire section) +# +# This should list the IP address (and optionally port) of the webapp on each +# remote server in the cluster. These servers must each have local access to +# metric data. Note that the first server to return a match for a query will be +# used. +#CLUSTER_SERVERS = ["10.0.2.2:80", "10.0.2.3:80"] + +## These are timeout values (in seconds) for requests to remote webapps +#REMOTE_STORE_FETCH_TIMEOUT = 6 # Timeout to fetch series data +#REMOTE_STORE_FIND_TIMEOUT = 2.5 # Timeout for metric find requests +#REMOTE_STORE_RETRY_DELAY = 60 # Time before retrying a failed remote webapp +#REMOTE_STORE_USE_POST = False # Use POST instead of GET for remote requests +#REMOTE_FIND_CACHE_DURATION = 300 # Time to cache remote metric find results + +## Prefetch cache +# set to True to fetch all metrics using a single http request per remote server +# instead of one http request per target, per remote server. +# Especially useful when generating graphs with more than 4-5 targets or if +# there's significant latency between this server and the backends. (>20ms) +#REMOTE_PREFETCH_DATA = False + +# During a rebalance of a consistent hash cluster, after a partition event on a replication > 1 cluster, +# or in other cases we might receive multiple TimeSeries data for a metric key. Merge them together rather +# that choosing the "most complete" one (pre-0.9.14 behaviour). +#REMOTE_STORE_MERGE_RESULTS = True + +## Remote rendering settings +# Set to True to enable rendering of Graphs on a remote webapp +#REMOTE_RENDERING = True +# List of IP (and optionally port) of the webapp on each remote server that +# will be used for rendering. Note that each rendering host should have local +# access to metric data or should have CLUSTER_SERVERS configured +#RENDERING_HOSTS = [] +#REMOTE_RENDER_CONNECT_TIMEOUT = 1.0 + +# If you are running multiple carbon-caches on this machine (typically behind a relay using +# consistent hashing), you'll need to list the ip address, cache query port, and instance name of each carbon-cache +# instance on the local machine (NOT every carbon-cache in the entire cluster). The default cache query port is 7002 +# and a common scheme is to use 7102 for instance b, 7202 for instance c, etc. +# +# You *should* use 127.0.0.1 here in most cases +#CARBONLINK_HOSTS = ["127.0.0.1:7002:a", "127.0.0.1:7102:b", "127.0.0.1:7202:c"] +#CARBONLINK_TIMEOUT = 1.0 +# Using 'query-bulk' queries for carbon +# It's more effective, but python-carbon 0.9.13 (or latest from 0.9.x branch) is required +# See https://github.com/graphite-project/carbon/pull/132 for details +#CARBONLINK_QUERY_BULK = False + +##################################### +# Additional Django Settings # +##################################### +# Uncomment the following line for direct access to Django settings such as +# MIDDLEWARE_CLASSES or APPS +#from graphite.app_settings import * + diff --git a/ansible/roles/graphite/templates/rewrite-rules.conf.j2 b/ansible/roles/graphite/templates/rewrite-rules.conf.j2 new file mode 100644 index 00000000..0848692e --- /dev/null +++ b/ansible/roles/graphite/templates/rewrite-rules.conf.j2 @@ -0,0 +1,22 @@ +# {{ ansible_managed }} +# This file defines regular expression patterns that can be used to +# rewrite metric names in a search & replace fashion. It consists of two +# sections, [pre] and [post]. The rules in the pre section are applied to +# metric names as soon as they are received. The post rules are applied +# after aggregation has taken place. +# +# The general form of each rule is as follows: +# +# regex-pattern = replacement-text +# +# For example: +# +# [post] +# _sum$ = +# _avg$ = +# +# These rules would strip off a suffix of _sum or _avg from any metric names +# after aggregation. + +[post] +^{{ graphite_api_key }} = diff --git a/ansible/roles/graphite/templates/storage-schemas.conf.j2 b/ansible/roles/graphite/templates/storage-schemas.conf.j2 new file mode 100644 index 00000000..d77df1ea --- /dev/null +++ b/ansible/roles/graphite/templates/storage-schemas.conf.j2 @@ -0,0 +1,5 @@ +# {{ ansible_managed }} +# +[default] +pattern = .* +retentions = 5s:30d,1m:120d,10m:1y diff --git a/ansible/roles/graphite/templates/systemd/80-graphite.preset.j2 b/ansible/roles/graphite/templates/systemd/80-graphite.preset.j2 new file mode 100644 index 00000000..02a48e0e --- /dev/null +++ b/ansible/roles/graphite/templates/systemd/80-graphite.preset.j2 @@ -0,0 +1,3 @@ +# graphite web service + +enable graphite.service diff --git a/ansible/roles/graphite/templates/systemd/graphite.service.j2 b/ansible/roles/graphite/templates/systemd/graphite.service.j2 new file mode 100644 index 00000000..ac0faf4d --- /dev/null +++ b/ansible/roles/graphite/templates/systemd/graphite.service.j2 @@ -0,0 +1,15 @@ +# {{ ansible_managed }} +[Unit] +Description=graphite gunicorn service +After=network.target + +[Service] +Type=simple +ExecStart=/usr/bin/gunicorn -b 127.0.0.1:8000 -w 10 -t 30 graphite_web:application +User=_graphite +WorkingDirectory=/usr/lib/python2.7/dist-packages/graphite +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/graphite/templates/whitelist.conf.j2 b/ansible/roles/graphite/templates/whitelist.conf.j2 new file mode 100644 index 00000000..63825ffe --- /dev/null +++ b/ansible/roles/graphite/templates/whitelist.conf.j2 @@ -0,0 +1,7 @@ +# {{ ansible_managed }} +# This file takes a single regular expression per line +# If USE_WHITELIST is set to True in carbon.conf, only metrics received which +# match one of these expressions will be persisted. If this file is empty or +# missing, all metrics will pass through. +# This file is reloaded automatically when changes are made +^{{ graphite_api_key }}.* diff --git a/ansible/roles/graphite/vars/main.yml b/ansible/roles/graphite/vars/main.yml new file mode 100644 index 00000000..07eabeaa --- /dev/null +++ b/ansible/roles/graphite/vars/main.yml @@ -0,0 +1,25 @@ +--- + +system_packages: + - grafana + - graphite-web + - graphite-api + - graphite-carbon + - git + - g++ + - gcc + - libpq-dev + - postgresql + - postgresql-common + - postgresql-contrib + - python-psycopg2 + - nginx + - vim + # needed for the ansible apt_repository module + - python-apt + - python + - gunicorn + +ssl_requirements: + - openssl + - libssl-dev