# DASHBOARD #
#############
#dashboard_enabled: False
-#dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
#dashboard_protocol: http
# DASHBOARD #
#############
#dashboard_enabled: False
-#dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
#dashboard_protocol: http
# DASHBOARD #
#############
dashboard_enabled: False
-dashboard_network_name: ceph-dashboard
# Choose http or https
# For https, you should set dashboard.crt/key and grafana.crt/key
dashboard_protocol: http
+++ /dev/null
-# This file is managed by ansible, don't make changes here - they will be
-# overwritten.
-[Unit]
-Description=grafana-server
-After=docker.service
-
-[Service]
-EnvironmentFile=-/etc/environment
-ExecStart=/usr/bin/docker start --attach grafana-server
-ExecStop=-/usr/bin/docker stop grafana-server
-Restart=always
-RestartSec=10s
-TimeoutStartSec=120
-TimeoutStopSec=15
-
-[Install]
-WantedBy=multi-user.target
state: stopped
failed_when: false
-- name: create docker container
- docker_container:
- name: grafana-server
- image: "{{ grafana_container_image }}"
- state: present
- # restart to allow updates
- restart: true
- restart_policy: no
- force_kill: yes
- published_ports: '3000:3000'
- detach: true
- volumes:
- - "/etc/grafana:/etc/grafana:Z"
- - "/var/lib/grafana:/var/lib/grafana:Z"
- networks:
- - name: "{{ dashboard_network_name }}"
- keep_volumes: true
- pull: true
- cpu_period: "{{ grafana_container_cpu_period }}"
- # As of ansible-2.5.2, this module doesn't support the equivalent of the
- # --cpus flag, so we must use period/quota for now
- cpu_quota: "{{ grafana_container_cpu_period * grafana_container_cpu_cores }}"
- memory: "{{ grafana_container_memory }}GB"
- memory_swap: "{{ grafana_container_memory * 2 }}GB"
- env:
- GF_INSTALL_PLUGINS: "{{ grafana_plugins|join(',') }}"
+# Make sure we re-create the container
+- name: remove old grafana-server container
+ command: "{{ container_binary }} rm -f grafana-server"
+ changed_when: false
+ failed_when: false
+
+- name: create grafana-server container
+ shell: |
+ {{ container_binary }} create --name grafana-server \
+ -v "/etc/grafana:/etc/grafana:Z" \
+ -v "/var/lib/grafana:/var/lib/grafana:Z" \
+ "--net=host" \
+ "--cpu-period={{ grafana_container_cpu_period }}" \
+ "--cpu-quota={{ grafana_container_cpu_period * grafana_container_cpu_cores }}" \
+ "--memory={{ grafana_container_memory }}GB" \
+ "--memory-swap={{ grafana_container_memory * 2 }}GB" \
+ -e "GF_INSTALL_PLUGINS={{ grafana_plugins|join(',') }}" \
+ "{{ grafana_container_image }}"
- name: ship systemd service
- copy:
+ template:
src: grafana-server.service
dest: "/etc/systemd/system/"
owner: root
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string> url
- url: 'http://prometheus:9090'
+ url: 'http://{{ groups["grafana-server"][0] }}:9090'
# <bool> enable/disable basic auth
basicAuth: false
# <bool> mark as default datasource. Max one per org
--- /dev/null
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=grafana-server
+{% if container_binary == 'docker' %}
+After=docker.service
+{% endif %}
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/{{ container_binary }} start --attach grafana-server
+ExecStop=-/usr/bin/{{ container_binary }} stop grafana-server
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
+++ /dev/null
-# This file is managed by ansible, don't make changes here - they will be
-# overwritten.
-[Unit]
-Description=Node Exporter
-After=docker.service
-
-[Service]
-EnvironmentFile=-/etc/environment
-ExecStart=/usr/bin/docker start --attach node-exporter
-# Make sure the cfg80211 is loaded before running the container, the node
-# exporter needs this module loaded to test for presence of wi-fi devices
-ExecStartPre=/usr/sbin/modprobe cfg80211
-ExecStop=-/usr/bin/docker stop node-exporter
-Restart=always
-RestartSec=10s
-TimeoutStartSec=120
-TimeoutStopSec=15
-
-[Install]
-WantedBy=multi-user.target
state: stopped
failed_when: false
-- name: start docker container
- docker_container:
- name: node-exporter
- image: "{{ node_exporter_container_image }}"
- state: started
- command:
- - '--path.procfs=/host/proc'
- - '--path.sysfs=/host/sys'
- - '--no-collector.timex'
- # restart to allow updates
- restart: true
- restart_policy: no
- force_kill: yes
- detach: true
- volumes:
- - '/proc:/host/proc:ro'
- - '/sys:/host/sys:ro'
- network_mode: host
- keep_volumes: true
- pull: true
+# Make sure we re-create the container
+- name: remove old node-exporter container
+ command: "{{ container_binary }} rm -f node-exporter"
+ changed_when: false
+ failed_when: false
+
+- name: start node-exporter container
+ shell: |
+ {{ container_binary }} run --detach --name node-exporter \
+ -v /proc:/host/proc:ro -v /sys:/host/sys:ro \
+ --net=host \
+ {{ node_exporter_container_image }} \
+ '--path.procfs=/host/proc' \
+ '--path.sysfs=/host/sys' \
+ '--no-collector.timex'
notify: restart node-exporter service
- name: ship systemd service
- copy:
+ template:
src: node_exporter.service
dest: "/etc/systemd/system/"
owner: root
--- /dev/null
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=Node Exporter
+{% if container_binary == 'docker' %}
+After=docker.service
+{% endif %}
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/{{ container_binary }} start --attach node-exporter
+# Make sure the cfg80211 is loaded before running the container, the node
+# exporter needs this module loaded to test for presence of wi-fi devices
+ExecStartPre=/usr/sbin/modprobe cfg80211
+ExecStop=-/usr/bin/{{ container_binary }} stop node-exporter
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
prometheus_container_memory: 4
prometheus_data_dir: /var/lib/prometheus
prometheus_conf_dir: /etc/prometheus
-prometheus_user_id: '65534' # This is the UID used by the prom/prometheus docker image
+prometheus_user_id: '65534' # This is the UID used by the prom/prometheus container image
alertmanager_container_image: prom/alertmanager:latest
alertmanager_container_cpu_period: 100000
+++ /dev/null
-# This file is managed by ansible, don't make changes here - they will be
-# overwritten.
-[Unit]
-Description=alertmanager
-After=docker.service
-
-[Service]
-EnvironmentFile=-/etc/environment
-ExecStart=/usr/bin/docker start --attach alertmanager
-ExecStop=/usr/bin/docker stop alertmanager
-Restart=always
-RestartSec=10s
-TimeoutStartSec=120
-TimeoutStopSec=15
-
-[Install]
-WantedBy=multi-user.target
+++ /dev/null
-# This file is managed by ansible, don't make changes here - they will be
-# overwritten.
-[Unit]
-Description=prometheus
-After=docker.service
-
-[Service]
-EnvironmentFile=-/etc/environment
-ExecStart=/usr/bin/docker start --attach prometheus
-ExecStop=/usr/bin/docker stop prometheus
-Restart=always
-RestartSec=10s
-TimeoutStartSec=120
-TimeoutStopSec=15
-
-[Install]
-WantedBy=multi-user.target
state: stopped
failed_when: false
+# Make sure we re-create the container
+- name: remove old alertmanager container
+ command: "{{ container_binary }} rm -f alertmanager"
+ changed_when: false
+ failed_when: false
+
- name: start alertmanager container
- docker_container:
- name: alertmanager
- image: "{{ alertmanager_container_image }}"
- state: started
- command:
- - '--config.file=/etc/alertmanager/alertmanager.yml'
- - '--storage.path=/alertmanager'
- # restart to allow updates
- restart: true
- restart_policy: no
- force_kill: yes
- published_ports: '9093:9093'
- detach: true
- volumes:
- - "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z"
- - "{{ alertmanager_data_dir }}:/alertmanager:Z"
- networks:
- - name: "{{ dashboard_network_name }}"
- keep_volumes: true
- pull: true
- cpu_period: "{{ alertmanager_container_cpu_period }}"
- # As of ansible-2.5.2, this module doesn't support the equivalent of the
- # --cpus flag, so we must use period/quota for now
- cpu_quota: "{{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}"
- #memory: 0
- #memory_swap: 0
- memory: "{{ alertmanager_container_memory }}GB"
- memory_swap: "{{ alertmanager_container_memory * 2 }}GB"
+ shell: |
+ {{ container_binary }} run --detach --name alertmanager \
+ -v "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" \
+ -v "{{ alertmanager_data_dir }}:/alertmanager:Z" \
+ "--net=host" \
+ "--cpu-period={{ alertmanager_container_cpu_period }}" \
+ "--cpu-quota={{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}" \
+ "--memory={{ alertmanager_container_memory }}GB" \
+ "--memory-swap={{ alertmanager_container_memory * 2 }}GB" \
+ "{{ alertmanager_container_image }}" \
+ "--config.file=/etc/alertmanager/alertmanager.yml" \
+ "--storage.path=/alertmanager"
notify: service handler
- name: make sure the prometheus service is down
state: stopped
failed_when: false
-- name: start prometheus docker container
- docker_container:
- name: prometheus
- image: "{{ prometheus_container_image }}"
- state: started
- command:
- - '--config.file=/etc/prometheus/prometheus.yml'
- - '--storage.tsdb.path=/prometheus'
- - '--web.external-url=http://{{ inventory_hostname }}:9090/'
- # restart to allow updates
- restart: true
- restart_policy: no
- force_kill: yes
- published_ports: '9090:9090'
- detach: true
- volumes:
- - "{{ prometheus_conf_dir }}:/etc/prometheus:Z"
- - "{{ prometheus_data_dir }}:/prometheus:Z"
- networks:
- - name: "{{ dashboard_network_name }}"
- user: "{{ prometheus_user_id }}"
- keep_volumes: true
- pull: true
- cpu_period: "{{ prometheus_container_cpu_period }}"
- # As of ansible-2.5.2, this module doesn't support the equivalent of the
- # --cpus flag, so we must use period/quota for now
- cpu_quota: "{{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}"
- #memory: 0
- #memory_swap: 0
- memory: "{{ prometheus_container_memory }}GB"
- memory_swap: "{{ prometheus_container_memory * 2 }}GB"
+# Make sure we re-create the container
+- name: remove old prometheus container
+ command: "{{ container_binary }} rm -f prometheus"
+ changed_when: false
+ failed_when: false
+
+- name: start prometheus container
+ shell: |
+ {{ container_binary }} run --detach --name prometheus \
+ -v "{{ prometheus_conf_dir }}:/etc/prometheus:Z" \
+ -v "{{ prometheus_data_dir }}:/prometheus:Z" \
+ "--net=host" \
+ "--user={{ prometheus_user_id }}" \
+ "--cpu-period={{ prometheus_container_cpu_period }}" \
+ "--cpu-quota={{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}" \
+ "--memory={{ prometheus_container_memory }}GB" \
+ "--memory-swap={{ prometheus_container_memory * 2 }}GB" \
+ "{{ prometheus_container_image }}" \
+ "--config.file=/etc/prometheus/prometheus.yml" \
+ "--storage.tsdb.path=/prometheus" \
+ "--web.external-url=http://{{ inventory_hostname }}:9090/"
notify: service handler
- name: ship systemd services
- copy:
+ template:
src: "{{ item }}"
dest: "/etc/systemd/system/"
owner: root
--- /dev/null
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=alertmanager
+{% if container_binary == 'docker' %}
+After=docker.service
+{% endif %}
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/{{ container_binary }} start --attach alertmanager
+ExecStop=/usr/bin/{{ container_binary }} stop alertmanager
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=prometheus
+{% if container_binary == 'docker' %}
+After=docker.service
+{% endif %}
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/{{ container_binary }} start --attach prometheus
+ExecStop=/usr/bin/{{ container_binary }} stop prometheus
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
alertmanagers:
- scheme: http
static_configs:
- - targets: ['alertmanager:9093']
+ - targets: ['{{ groups["grafana-server"][0] }}:9093']