From 5a5e185e111eaa8e9a6f1843b6b085cde7415be0 Mon Sep 17 00:00:00 2001 From: Ivan Font Date: Wed, 5 Oct 2016 21:32:38 -0700 Subject: [PATCH] Reworked purge cluster playbook - Separated out one large playbook into multiple playbooks to run host-type by host-type i.e. mdss, rgws, rbdmirrors, nfss, osds, mons. - Combined common tasks into one shared task for all hosts where applicable - Fixed various bugs Signed-off-by: Ivan Font --- infrastructure-playbooks/purge-cluster.yml | 544 ++++++++++++++------- 1 file changed, 375 insertions(+), 169 deletions(-) diff --git a/infrastructure-playbooks/purge-cluster.yml b/infrastructure-playbooks/purge-cluster.yml index 9c8250366..bfe310d16 100644 --- a/infrastructure-playbooks/purge-cluster.yml +++ b/infrastructure-playbooks/purge-cluster.yml @@ -13,6 +13,7 @@ - name: confirm whether user really meant to purge the cluster hosts: localhost + gather_facts: false vars_prompt: - name: ireallymeanit @@ -30,90 +31,52 @@ invoking the playbook" when: ireallymeanit != 'yes' -- name: stop ceph cluster - hosts: - - mons - - osds - - mdss - - rgws - - nfss - become: yes +- name: gather facts and check if using systemd vars: - osd_group_name: osds - mon_group_name: mons - rgw_group_name: rgws - mds_group_name: mdss - nfs_group_name: nfss + mon_group_name: mons + osd_group_name: osds + mds_group_name: mdss + rgw_group_name: rgws rbdmirror_group_name: rbdmirrors + nfs_group_name: nfss -# When set to true both groups of packages are purged. -# This can cause problem with qemu-kvm - purge_all_packages: true + hosts: + - "{{ mon_group_name }}" + - "{{ osd_group_name }}" + - "{{ mds_group_name }}" + - "{{ rgw_group_name }}" + - "{{ rbdmirror_group_name }}" + - "{{ nfs_group_name }}" -# When set to true and raw _multi_journal is used then block devices are also zapped - zap_block_devs: true + become: true - ceph_packages: - - ceph - - ceph-common - - ceph-fs-common - - ceph-fuse - - ceph-mds - - ceph-release - - ceph-radosgw + tasks: + - name: are we using systemd + shell: "if [ -d /usr/lib/systemd ] ; then find /usr/lib/systemd/system -name 'ceph*' | wc -l ; else echo 0 ; fi" + register: systemd_unit_files - ceph_remaining_packages: - - libcephfs1 - - librados2 - - libradosstriper1 - - librbd1 - - python-cephfs - - python-rados - - python-rbd - cluster: ceph # name of the cluster - monitor_name: "{{ ansible_hostname }}" - mds_name: "{{ ansible_hostname }}" - osd_auto_discovery: false +- name: purge ceph mds cluster + vars: + mds_group_name: mdss - handlers: - - name: restart machine - shell: sleep 2 && shutdown -r now "Ansible updates triggered" - async: 1 - poll: 0 - ignore_errors: true + hosts: + - "{{ mds_group_name }}" - - name: wait for server to boot - become: false - local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=500 + gather_facts: false # Already gathered previously - - name: remove data - file: - path: /var/lib/ceph - state: absent + become: true tasks: - - name: check for a device list - fail: - msg: "OSD automatic discovery was detected, purge cluster does not support this scenario. If you want to purge the cluster, manually provide the list of devices in group_vars/osds using the devices variable." - when: - osd_group_name in group_names and - devices is not defined and - osd_auto_discovery - - - name: get osd numbers - shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" - register: osd_ids - changed_when: false - - - name: are we using systemd - shell: "if [ -d /usr/lib/systemd ] ; then find /usr/lib/systemd/system -name 'ceph*' | wc -l ; else echo 0 ; fi" - register: systemd_unit_files - -# after Hammer release + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-mds/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/mdss + failed_when: false - name: stop ceph.target with systemd service: @@ -124,53 +87,107 @@ ansible_os_family == 'RedHat' and systemd_unit_files.stdout != "0" - - name: stop ceph-osd with systemd + - name: stop ceph mdss with systemd service: - name: ceph-osd@{{item}} + name: ceph-mds@{{ ansible_hostname }} state: stopped enabled: no - with_items: "{{ osd_ids.stdout_lines }}" when: ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - osd_group_name in group_names + systemd_unit_files.stdout != "0" - - name: stop ceph mons with systemd - service: - name: ceph-mon@{{ ansible_hostname }} - state: stopped - enabled: no + - name: stop ceph mdss + shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi" when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - mon_group_name in group_names + ansible_os_family == 'RedHat' - - name: stop ceph mdss with systemd +# Ubuntu 14.04 + - name: stop ceph mdss on ubuntu + command: initctl stop ceph-mds cluster={{ cluster }} id={{ ansible_hostname }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' + + +- name: purge ceph rgw cluster + + vars: + rgw_group_name: rgws + + hosts: + - "{{ rgw_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-rgw/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/rgws + failed_when: false + + - name: stop ceph.target with systemd service: - name: ceph-mds@{{ ansible_hostname }} + name: ceph.target state: stopped + enabled: no when: ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - mds_group_name in group_names + systemd_unit_files.stdout != "0" - name: stop ceph rgws with systemd service: name: ceph-radosgw@rgw.{{ ansible_hostname }} state: stopped + enabled: no when: ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - rgw_group_name in group_names + systemd_unit_files.stdout != "0" - - name: stop ceph nfss with systemd + - name: stop ceph rgws + shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi" + when: + ansible_os_family == 'RedHat' + +# Ubuntu 14.04 + - name: stop ceph rgws on ubuntu + command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' + + +- name: purge ceph rbd-mirror cluster + + vars: + rbdmirror_group_name: rbdmirrors + + hosts: + - "{{ rbdmirror_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-rbd-mirror/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/rbd-mirrors + failed_when: false + + - name: stop ceph.target with systemd service: - name: nfs-ganesha + name: ceph.target state: stopped + enabled: no when: ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - nfs_group_name in group_names + systemd_unit_files.stdout != "0" - name: stop ceph rbd mirror with systemd service: @@ -178,94 +195,155 @@ state: stopped when: ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - rbdmirror_group_name in group_names - -# before infernalis release, using sysvinit scripts -# we use this test so we do not have to know which RPM contains the boot script -# or where it is placed. + systemd_unit_files.stdout != "0" - - name: stop ceph osds - shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi" +# Ubuntu 14.04 + - name: stop ceph rbd mirror on ubuntu + command: initctl stop ceph-rbd-mirorr cluster={{ cluster }} id=admin + failed_when: false when: - ansible_os_family == 'RedHat' and - osd_group_name in group_names + ansible_distribution == 'Ubuntu' - - name: stop ceph mons - shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - mon_group_name in group_names - - name: stop ceph mdss - shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi" +- name: purge ceph nfs cluster + + vars: + nfs_group_name: nfss + + hosts: + - "{{ nfs_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-nfs/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/nfss + failed_when: false + + - name: stop ceph.target with systemd + service: + name: ceph.target + state: stopped + enabled: no when: ansible_os_family == 'RedHat' and - mds_group_name in group_names + systemd_unit_files.stdout != "0" - - name: stop ceph rgws - shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi" + - name: stop ceph nfss with systemd + service: + name: nfs-ganesha + state: stopped when: ansible_os_family == 'RedHat' and - rgw_group_name in group_names + systemd_unit_files.stdout != "0" - name: stop ceph nfss shell: "service nfs-ganesha status ; if [ $? == 0 ] ; then service nfs-ganesha stop ; else echo ; fi" when: - ansible_os_family == 'RedHat' and - nfs_group_name in group_names + ansible_os_family == 'RedHat' # Ubuntu 14.04 - - name: stop ceph osds on ubuntu - shell: | - for id in $(ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'); do - initctl stop ceph-osd cluster={{ cluster }} id=$id - done + - name: stop ceph nfss on ubuntu + command: initctl stop nfs-ganesha failed_when: false when: - ansible_distribution == 'Ubuntu' and - osd_group_name in group_names - with_items: "{{ osd_ids.stdout_lines }}" + ansible_distribution == 'Ubuntu' - - name: stop ceph mons on ubuntu - command: initctl stop ceph-mon cluster={{ cluster }} id={{ monitor_name }} + +- name: purge ceph osd cluster + + vars: + osd_group_name: osds + +# When set to true and raw _multi_journal is used then block devices are also zapped + zap_block_devs: true + + hosts: + - "{{ osd_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + handlers: + - name: restart machine + shell: sleep 2 && shutdown -r now "Ansible updates triggered" + async: 1 + poll: 0 + ignore_errors: true + + - name: wait for server to boot + become: false + local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=500 + + - name: remove data + file: + path: /var/lib/ceph + state: absent + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-osd/defaults/main.yml + - include_vars: ../group_vars/all failed_when: false + - include_vars: ../group_vars/osds + failed_when: false + + - name: check for a device list + fail: + msg: "OSD automatic discovery was detected, purge cluster does not support this scenario. If you want to purge the cluster, manually provide the list of devices in group_vars/osds using the devices variable." when: - ansible_distribution == 'Ubuntu' and - mon_group_name in group_names + devices is not defined and + osd_auto_discovery - - name: stop ceph mdss on ubuntu - command: initctl stop ceph-mds cluster={{ cluster }} id={{ mds_name }} - failed_when: false + - name: get osd numbers + shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" + register: osd_ids + changed_when: false + + - name: stop ceph.target with systemd + service: + name: ceph.target + state: stopped + enabled: no when: - ansible_distribution == 'Ubuntu' and - mds_group_name in group_names + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" - - name: stop ceph rgws on ubuntu - command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }} - failed_when: false + - name: stop ceph-osd with systemd + service: + name: ceph-osd@{{item}} + state: stopped + enabled: no + with_items: "{{ osd_ids.stdout_lines }}" when: - ansible_distribution == 'Ubuntu' and - rgw_group_name in group_names + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" - - name: stop ceph nfss on ubuntu - command: initctl stop nfs-ganesha - failed_when: false +# before infernalis release, using sysvinit scripts +# we use this test so we do not have to know which RPM contains the boot script +# or where it is placed. + + - name: stop ceph osds + shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi" when: - ansible_distribution == 'Ubuntu' and - nfs_group_name in group_names + ansible_os_family == 'RedHat' - - name: stop ceph rbd mirror on ubuntu - command: initctl stop ceph-rbd-mirorr cluster={{ cluster }} id=admin +# Ubuntu 14.04 + - name: stop ceph osds on ubuntu + shell: | + for id in $(ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'); do + initctl stop ceph-osd cluster={{ cluster }} id=$id + done failed_when: false when: - ansible_distribution == 'Ubuntu' and - rbdmirror_group_name in group_names - - - name: check for anything running ceph - shell: "ps awux | grep -- /usr/bin/[c]eph-" - register: check_for_running_ceph - failed_when: check_for_running_ceph.rc == 0 + ansible_distribution == 'Ubuntu' + with_items: "{{ osd_ids.stdout_lines }}" - name: see if ceph-disk-created data partitions are present shell: | @@ -291,20 +369,14 @@ shell: "(grep /var/lib/ceph/osd /proc/mounts || echo -n) | awk '{ print $2 }'" register: mounted_osd changed_when: false - when: - osd_group_name in group_names - name: drop all cache shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches" - when: - osd_group_name in group_names - name: umount osd data partition shell: umount {{ item }} with_items: - "{{ mounted_osd.stdout_lines }}" - when: - osd_group_name in group_names - name: remove osd mountpoint tree file: @@ -312,15 +384,6 @@ state: absent register: remove_osd_mountpoints ignore_errors: true - when: - osd_group_name in group_names - - - name: remove monitor store and bootstrap keys - file: - path: /var/lib/ceph/ - state: absent - when: - mon_group_name in group_names - name: is reboot needed local_action: shell echo requesting reboot @@ -330,7 +393,6 @@ - wait for server to boot - remove data when: - osd_group_name in group_names and remove_osd_mountpoints.failed is defined - name: see if ceph-disk is installed @@ -342,7 +404,6 @@ shell: ceph-disk zap "{{ item }}" with_items: "{{ devices | default([]) }}" when: - osd_group_name in group_names and ceph_disk_present.rc == 0 and ceph_data_partlabels.rc == 0 and zap_block_devs @@ -361,10 +422,123 @@ sgdisk --delete $partition_nb $raw_device with_items: "{{ceph_journal_partition_to_erase_path.stdout_lines}}" when: - osd_group_name in group_names and ceph_journal_partlabels.rc == 0 and zap_block_devs + +- name: purge ceph mon cluster + + vars: + mon_group_name: mons + + hosts: + - "{{ mon_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../roles/ceph-mon/defaults/main.yml + - include_vars: ../roles/ceph-restapi/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/mons + failed_when: false + - include_vars: ../group_vars/restapis + failed_when: false + + - name: stop ceph.target with systemd + service: + name: ceph.target + state: stopped + enabled: no + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" + + - name: stop ceph mons with systemd + service: + name: ceph-mon@{{ ansible_hostname }} + state: stopped + enabled: no + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" + + - name: stop ceph mons + shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi" + when: + ansible_os_family == 'RedHat' + + - name: stop ceph mons on ubuntu + command: initctl stop ceph-mon cluster={{ cluster }} id={{ ansible_hostname }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' + + - name: remove monitor store and bootstrap keys + file: + path: /var/lib/ceph/ + state: absent + +- name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data + + vars: + mon_group_name: mons + osd_group_name: osds + mds_group_name: mdss + rgw_group_name: rgws + rbdmirror_group_name: rbdmirrors + nfs_group_name: nfss + +# When set to true both groups of packages are purged. +# This can cause problem with qemu-kvm + purge_all_packages: true + + ceph_packages: + - ceph + - ceph-common + - ceph-fs-common + - ceph-fuse + - ceph-mds + - ceph-release + - ceph-radosgw + + ceph_remaining_packages: + - libcephfs1 + - librados2 + - libradosstriper1 + - librbd1 + - python-cephfs + - python-rados + - python-rbd + + hosts: + - "{{ mon_group_name }}" + - "{{ osd_group_name }}" + - "{{ mds_group_name }}" + - "{{ rgw_group_name }}" + - "{{ rbdmirror_group_name }}" + - "{{ nfs_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + handlers: + - name: remove data + file: + path: /var/lib/ceph + state: absent + + tasks: + - name: check for anything running ceph + shell: "ps awux | grep -- /usr/bin/[c]eph-" + register: check_for_running_ceph + failed_when: check_for_running_ceph.rc == 0 + - name: purge ceph packages with yum yum: name: "{{ item }}" @@ -474,3 +648,35 @@ state: absent when: ansible_os_family == 'RedHat' + + +- name: purge fetch directory + + hosts: + - localhost + + gather_facts: false + + tasks: + - include_vars: ../roles/ceph-common/defaults/main.yml + - include_vars: ../group_vars/all + failed_when: false + - include_vars: ../group_vars/mdss + failed_when: false + - include_vars: ../group_vars/rgws + failed_when: false + - include_vars: ../group_vars/rbd-mirrors + failed_when: false + - include_vars: ../group_vars/nfss + failed_when: false + - include_vars: ../group_vars/osds + failed_when: false + - include_vars: ../group_vars/mons + failed_when: false + - include_vars: ../group_vars/restapis + failed_when: false + + - name: purge fetch directory for localhost + file: + path: "{{ fetch_directory }}" + state: absent -- 2.39.5