]> git.apps.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
ceph-prometheus: add alertmanager HA config
authorDimitri Savineau <dsavinea@redhat.com>
Thu, 13 Feb 2020 20:56:23 +0000 (15:56 -0500)
committerGuillaume Abrioux <gabrioux@redhat.com>
Mon, 17 Feb 2020 09:46:21 +0000 (10:46 +0100)
When using multiple alertmanager nodes (via the grafana-server group)
then we need to specify the other peers in the configuration.

https://prometheus.io/docs/alerting/alertmanager/#high-availability
https://github.com/prometheus/alertmanager#high-availability

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1792225
Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
group_vars/all.yml.sample
group_vars/rhcs.yml.sample
roles/ceph-defaults/defaults/main.yml
roles/ceph-infra/tasks/dashboard_firewall.yml
roles/ceph-prometheus/templates/alertmanager.service.j2

index 09cbc73212d29306a7e60f1e9ca4c66e2d96bfc5..25f3ff7a030d6a00dc248c6c4c1e294b257bc229 100644 (file)
@@ -777,6 +777,7 @@ dummy:
 #alertmanager_data_dir: /var/lib/alertmanager
 #alertmanager_conf_dir: /etc/alertmanager
 #alertmanager_port: 9093
+#alertmanager_cluster_port: 9094
 
 
 ##################################
index 9300845af9f28668b6038d64120f6c437de0fa16..595201bde026af57816f49c0e4d931a9240ebd44 100644 (file)
@@ -777,6 +777,7 @@ alertmanager_container_image: registry.redhat.io/openshift4/ose-prometheus-alert
 #alertmanager_data_dir: /var/lib/alertmanager
 #alertmanager_conf_dir: /etc/alertmanager
 #alertmanager_port: 9093
+#alertmanager_cluster_port: 9094
 
 
 ##################################
index 296993d44b68b792ce16ff763b4ddaee9a868a16..82ea17b75ca266b60731d7d1eabf5d11bf665a4a 100644 (file)
@@ -769,6 +769,7 @@ alertmanager_container_memory: 4
 alertmanager_data_dir: /var/lib/alertmanager
 alertmanager_conf_dir: /etc/alertmanager
 alertmanager_port: 9093
+alertmanager_cluster_port: 9094
 
 
 ##################################
index f3166355da70b07853298d150b68b68fbe2768f3..d598b9331b1d5d70285dd97164b79e0df78b345b 100644 (file)
         permanent: true
         immediate: true
         state: enabled
+
+    - name: open alertmanager cluster port
+      firewalld:
+        port: "{{ alertmanager_cluster_port }}/{{ item }}"
+        zone: "{{ ceph_dashboard_firewall_zone }}"
+        permanent: true
+        immediate: true
+        state: enabled
+      with_items:
+        - "tcp"
+        - "udp"
   when:
     - grafana_server_group_name is defined
     - grafana_server_group_name in group_names
index c905cd11d631d7f892a7e954534ab41232c4fef7..2c787cb36655849441aa2c0e29b5834c61faa0fc 100644 (file)
@@ -22,9 +22,13 @@ ExecStart=/usr/bin/{{ container_binary }} run --rm --name=alertmanager \
   --memory-swap={{ alertmanager_container_memory * 2 }}GB \
   {{ alertmanager_container_image }} \
   --config.file=/etc/alertmanager/alertmanager.yml \
+  --cluster.listen-address={{ grafana_server_addr }}:{{ alertmanager_cluster_port }} \
+{% for peer in grafana_server_addrs|difference(grafana_server_addr) %}
+  --cluster.peer={{ peer }}:{{ alertmanager_cluster_port }} \
+{% endfor %}
   --storage.path=/alertmanager \
   --web.external-url=http://{{ ansible_fqdn }}:{{ alertmanager_port }}/ \
-  --web.listen-address=:{{ alertmanager_port }}
+  --web.listen-address={{ grafana_server_addr }}:{{ alertmanager_port }}
 ExecStop=/usr/bin/{{ container_binary }} stop alertmanager
 KillMode=none
 Restart=always