]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
ceph-prometheus: add alertmanager HA config
authorDimitri Savineau <dsavinea@redhat.com>
Thu, 13 Feb 2020 20:56:23 +0000 (15:56 -0500)
committerGuillaume Abrioux <gabrioux@redhat.com>
Mon, 17 Feb 2020 15:18:20 +0000 (16:18 +0100)
When using multiple alertmanager nodes (via the grafana-server group)
then we need to specify the other peers in the configuration.

https://prometheus.io/docs/alerting/alertmanager/#high-availability
https://github.com/prometheus/alertmanager#high-availability

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1792225
Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
(cherry picked from commit b9d975385c2dceca3b06c18d4c37eadbe9f48c92)

group_vars/all.yml.sample
group_vars/rhcs.yml.sample
roles/ceph-defaults/defaults/main.yml
roles/ceph-infra/tasks/dashboard_firewall.yml
roles/ceph-prometheus/templates/alertmanager.service.j2

index 010d4bfbf4e8614264d95bd10f6a7bea9f06a758..34778b97ff9c38289ef158be2dceec95cfa6ea6e 100644 (file)
@@ -773,6 +773,7 @@ dummy:
 #alertmanager_data_dir: /var/lib/alertmanager
 #alertmanager_conf_dir: /etc/alertmanager
 #alertmanager_port: 9093
+#alertmanager_cluster_port: 9094
 
 
 ##################################
index 94e013a439754fda97b751029f1ad24ca46e3fcc..e1a0a6f581d61185724412aa508337e975c042a7 100644 (file)
@@ -773,6 +773,7 @@ alertmanager_container_image: registry.redhat.io/openshift4/ose-prometheus-alert
 #alertmanager_data_dir: /var/lib/alertmanager
 #alertmanager_conf_dir: /etc/alertmanager
 #alertmanager_port: 9093
+#alertmanager_cluster_port: 9094
 
 
 ##################################
index 3b76963ecc432e14f331cae4a30a326a6f7c4f70..9f74fc1ced1acad262b437bbbe20d3167761d76f 100644 (file)
@@ -765,6 +765,7 @@ alertmanager_container_memory: 4
 alertmanager_data_dir: /var/lib/alertmanager
 alertmanager_conf_dir: /etc/alertmanager
 alertmanager_port: 9093
+alertmanager_cluster_port: 9094
 
 
 ##################################
index f3166355da70b07853298d150b68b68fbe2768f3..d598b9331b1d5d70285dd97164b79e0df78b345b 100644 (file)
         permanent: true
         immediate: true
         state: enabled
+
+    - name: open alertmanager cluster port
+      firewalld:
+        port: "{{ alertmanager_cluster_port }}/{{ item }}"
+        zone: "{{ ceph_dashboard_firewall_zone }}"
+        permanent: true
+        immediate: true
+        state: enabled
+      with_items:
+        - "tcp"
+        - "udp"
   when:
     - grafana_server_group_name is defined
     - grafana_server_group_name in group_names
index 64b264e0b26c234b1e353a9d54bd5096eee97508..173146ed41b950c5b3ba996c3b654524202bc85d 100644 (file)
@@ -22,9 +22,13 @@ ExecStart=/usr/bin/{{ container_binary }} run --rm --name=alertmanager \
   --memory-swap={{ alertmanager_container_memory * 2 }}GB \
   {{ alertmanager_container_image }} \
   --config.file=/etc/alertmanager/alertmanager.yml \
+  --cluster.listen-address={{ grafana_server_addr }}:{{ alertmanager_cluster_port }} \
+{% for peer in grafana_server_addrs|difference(grafana_server_addr) %}
+  --cluster.peer={{ peer }}:{{ alertmanager_cluster_port }} \
+{% endfor %}
   --storage.path=/alertmanager \
   --web.external-url=http://{{ ansible_fqdn }}:{{ alertmanager_port }}/ \
-  --web.listen-address=:{{ alertmanager_port }}
+  --web.listen-address={{ grafana_server_addr }}:{{ alertmanager_port }}
 ExecStop=/usr/bin/{{ container_binary }} stop alertmanager
 Restart=always
 RestartSec=10s