# if you don't want it keep the option commented
#common_single_host_mode: true
+## Handlers - restarting daemons after a config change
+# if for whatever reasons the content of your ceph configuration changes
+# ceph daemons will be restarted as well. At the moment, we can not detect
+# which config option changed so all the daemons will be restarted. Although
+# this restart will be serialized for each node, in between a health check
+# will be performed so we make sure we don't move to the next node until
+# ceph is not healthy
+# Obviously between the checks (for monitors to be in quorum and for osd's pgs
+# to be clean) we have to wait. These retries and delays can be configurable
+# for both monitors and osds.
+#handler_health_mon_check_retries: 5
+#handler_health_mon_check_delay: 10
+#handler_health_osd_check_retries: 40
+#handler_health_osd_check_delay: 30
###################
# CONFIG OVERRIDE #
# if you don't want it keep the option commented
#common_single_host_mode: true
+## Handlers - restarting daemons after a config change
+# if for whatever reasons the content of your ceph configuration changes
+# ceph daemons will be restarted as well. At the moment, we can not detect
+# which config option changed so all the daemons will be restarted. Although
+# this restart will be serialized for each node, in between a health check
+# will be performed so we make sure we don't move to the next node until
+# ceph is not healthy
+# Obviously between the checks (for monitors to be in quorum and for osd's pgs
+# to be clean) we have to wait. These retries and delays can be configurable
+# for both monitors and osds.
+handler_health_mon_check_retries: 5
+handler_health_mon_check_delay: 10
+handler_health_osd_check_retries: 40
+handler_health_osd_check_delay: 30
###################
# CONFIG OVERRIDE #
- name: update apt cache
apt:
update-cache: yes
+ when: ansible_os_family == 'Debian'
- name: restart ceph mons
- service:
- name: ceph-mon@{{ monitor_name }}
- state: restarted
- when:
- - socket.rc == 0
- - mon_group_name in group_names
+ include: "{{ playbook_dir }}/roles/ceph-common/handlers/restart-mon.yml"
-# This does not just restart OSDs but everything else too. Unfortunately
-# at this time the ansible role does not have an OSD id list to use
-# for restarting them specifically.
- name: restart ceph osds
- service:
- name: ceph.target
- state: restarted
- when:
- - socket.rc == 0
- - osd_group_name in group_names
+ include: "{{ playbook_dir }}/roles/ceph-common/handlers/restart-osd.yml"
- name: restart ceph mdss
- service:
- name: ceph-mds@{{ mds_name }}
- state: restarted
- when:
- - socket.rc == 0
- - mds_group_name in group_names
+ include: "{{ playbook_dir }}/roles/ceph-common/handlers/restart-mds.yml"
- name: restart ceph rgws
- service:
- name: ceph-rgw@{{ ansible_hostname }}
- state: restarted
- when:
- - socketrgw.rc == 0
- - rgw_group_name in group_names
+ include: "{{ playbook_dir }}/roles/ceph-common/handlers/restart-rgw.yml"
- name: restart ceph nfss
service:
--- /dev/null
+---
+- name: restart ceph mdss
+ service:
+ name: ceph-mds@{{ mds_name }}
+ state: restarted
+ # serial: 1 would be the proper solution here, but that can only be set on play level
+ # upstream issue: https://github.com/ansible/ansible/issues/12170
+ run_once: true
+ with_items: "{{ groups[mds_group_name] }}"
+ delegate_to: "{{ item }}"
+ when:
+ - socket.rc == 0
+ - mds_group_name in group_names
--- /dev/null
+---
+- name: restart ceph mons
+ service:
+ name: ceph-mon@{{ monitor_name }}
+ state: restarted
+ # serial: 1 would be the proper solution here, but that can only be set on play level
+ # upstream issue: https://github.com/ansible/ansible/issues/12170
+ run_once: true
+ with_items: "{{ groups[mon_group_name] }}"
+ delegate_to: "{{ item }}"
+ when:
+ - socket.rc == 0
+ - mon_group_name in group_names
+
+- name: validate monitors
+ include: validate-mon.yml
+ when: mon_group_name in group_names
--- /dev/null
+---
+# This does not just restart OSDs but everything else too. Unfortunately
+# at this time the ansible role does not have an OSD id list to use
+# for restarting them specifically.
+- name: restart ceph osds
+ shell: |
+ for id in $(ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'); do
+ systemctl restart ceph-osd@$id
+ sleep 5
+ done
+ # serial: 1 would be the proper solution here, but that can only be set on play level
+ # upstream issue: https://github.com/ansible/ansible/issues/12170
+ run_once: true
+ with_items: "{{ groups[osd_group_name] }}"
+ delegate_to: "{{ item }}"
+ when:
+ - socket.rc == 0
+ - osd_group_name in group_names
+
+- name: validate osds
+ include: validate-osd.yml
+ when: osd_group_name in group_names
--- /dev/null
+---
+- name: restart ceph rgws
+ service:
+ name: ceph-rgw@{{ ansible_hostname }}
+ state: restarted
+ # serial: 1 would be the proper solution here, but that can only be set on play level
+ # upstream issue: https://github.com/ansible/ansible/issues/12170
+ run_once: true
+ with_items: "{{ groups[rgw_group_name] }}"
+ delegate_to: "{{ item }}"
+ when:
+ - socketrgw.rc == 0
+ - rgw_group_name in group_names
--- /dev/null
+---
+- name: wait for ceph monitor socket
+ wait_for:
+ path: "/var/run/ceph/{{ cluster }}-mon.{{ monitor_name }}.asok"
+
+- name: set mon_host_count
+ set_fact: mon_host_count={{ groups[mon_group_name] | length }}
+
+- name: select a running monitor
+ set_fact: mon_host={{ item }}
+ with_items: "{{ groups[mon_group_name] }}"
+ when:
+ - item != inventory_hostname
+ - mon_host_count | int > 1
+
+- name: select first monitor if only one monitor
+ set_fact: mon_host={{ item }}
+ with_items: "{{ groups[mon_group_name][0] }}"
+ when: mon_host_count | int == 1
+
+- name: waiting for the monitor to join the quorum...
+ shell: |
+ ceph -s --cluster {{ cluster }} | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
+ register: result
+ until: result.rc == 0
+ retries: "{{ handler_health_mon_check_retries }}"
+ delay: "{{ handler_health_mon_check_delay }}"
+ delegate_to: "{{ mon_host }}"
--- /dev/null
+---
+- name: collect osds
+ shell: |
+ ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'
+ register: osd_ids
+
+- name: wait for ceph osd socket(s)
+ wait_for:
+ path: "/var/run/ceph/{{ cluster }}-osd.{{ item }}.asok"
+ with_items: "{{ osd_ids.stdout_lines }}"
+
+- name: waiting for clean pgs...
+ shell: |
+ test "$(ceph --cluster {{ cluster }} pg stat | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(ceph --cluster {{ cluster }} pg stat | sed 's/pgs.*//;s/^.*://;s/ //')" && ceph --cluster {{ cluster }} health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+ register: result
+ until: result.rc == 0
+ retries: "{{ handler_health_osd_check_retries }}"
+ delay: "{{ handler_health_osd_check_delay }}"
+ delegate_to: "{{ groups[mon_group_name][0] }}"
- restart ceph osds
- restart ceph mdss
- restart ceph rgws
+ - restart ceph nfss