From b5abe8740cf5a4a234651b588ba24238983bb32e Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 13 Dec 2017 15:24:33 +0100 Subject: [PATCH] purge-cluster: wipe disk using dd MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit `bluestore_purge_osd_non_container` scenario is failing because it keeps old osd_uuid information on devices and cause the `ceph-disk activate` to fail when trying to redeploy a new cluster after a purge. typical error seen : ``` 2017-12-13 14:29:48.021288 7f6620651d00 -1 bluestore(/var/lib/ceph/tmp/mnt.2_3gh6/block) _check_or_set_bdev_label bdev /var/lib/ceph/tmp/mnt.2_3gh6/block fsid 770080e2-20db-450f-bc17-81b55f167982 does not match our fsid f33efff0-2f07-4203-ad8d-8a0844d6bda0 ``` Signed-off-by: Guillaume Abrioux (cherry picked from commit eeedefdf0207f04e67af490e03d895324ab609a1) Signed-off-by: Sébastien Han --- infrastructure-playbooks/purge-cluster.yml | 117 +++++---------------- 1 file changed, 24 insertions(+), 93 deletions(-) diff --git a/infrastructure-playbooks/purge-cluster.yml b/infrastructure-playbooks/purge-cluster.yml index c35e4d6ce..ffa6daddc 100644 --- a/infrastructure-playbooks/purge-cluster.yml +++ b/infrastructure-playbooks/purge-cluster.yml @@ -262,65 +262,6 @@ - /usr/lib/udev/rules.d/95-ceph-osd.rules - /usr/lib/udev/rules.d/60-ceph-by-parttypeuuid.rules - - name: see if ceph-disk-created data partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*.data" - failed_when: false - register: ceph_data_partlabels - - - name: see if ceph-disk-created block partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*block$" - failed_when: false - register: ceph_block_partlabels - - - name: see if ceph-disk-created journal partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*.journal" - failed_when: false - register: ceph_journal_partlabels - - - name: see if ceph-disk-created block db partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*.block.db" - failed_when: false - register: ceph_db_partlabels - - - name: see if ceph-disk-created block wal partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*.block.wal" - failed_when: false - register: ceph_wal_partlabels - - - name: see if ceph-disk-created lockbox partitions are present - shell: | - ls /dev/disk/by-partlabel | grep -q "ceph.*.lockbox" - failed_when: false - register: ceph_lockbox_partlabels - -# Initial attempt, doing everything in Ansible... -# - name: see if encrypted partitions are present -# shell: blkid -t TYPE=crypto_LUKS -o value -s PARTUUID -# register: encrypted_partuuid -# -# - name: find if these encrypted partitions are ceph data partitions -# shell: blkid -t PARTLABEL="ceph data" -o value -s PARTUUID $(blkid -U {{ item }}) -# failed_when: false -# with_items: "{{ encrypted_partuuid.stdout_lines }}" -# when: "{{ encrypted_partuuid | length > 0 }}" -# register: encrypted_partuuid_ceph_data -# -# - name: find if these encrypted partitions are ceph journal partitions -# shell: blkid -t PARTLABEL="ceph journal" -o value -s PARTUUID $(blkid -U {{ item }}) -# failed_when: false -# with_items: "{{ encrypted_partuuid.stdout_lines }}" -# when: "{{ encrypted_partuuid | length > 0 }}" -# register: encrypted_partuuid_ceph_journal -# -# - name: merge the list of ceph encrypted partitions -# set_fact: -# encrypted_partuuid_ceph: "{{ encrypted_partuuid_ceph_data + encrypted_partuuid_ceph_journal }}" - # NOTE(leseb): hope someone will find a more elegant way one day... - name: see if encrypted partitions are present shell: | @@ -370,35 +311,15 @@ - name: get ceph data partitions shell: | blkid | awk -F: '/ceph data/ { print $1 }' - when: ceph_data_partlabels.rc == 0 failed_when: false register: ceph_data_partition_to_erase_path - name: get ceph lockbox partitions shell: | blkid | awk '/ceph lockbox/ { sub (":", "", $1); print $1 }' - when: ceph_lockbox_partlabels.rc == 0 failed_when: false register: ceph_lockbox_partition_to_erase_path - - name: zap osd disks - shell: | - if (echo "{{ item }}" | grep -Esq '[0-9]{1,2}$'); then - raw_device=$(echo "{{ item }}" | grep -Eo '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}') - partition_nb=$(echo "{{ item }}" | grep -Eo '[0-9]{1,2}$') - sgdisk --delete $partition_nb $raw_device - udevadm settle --timeout=600 - else - ceph-disk zap "{{ item }}" - udevadm settle --timeout=600 - fi - with_items: - - "{{ ceph_data_partition_to_erase_path.stdout_lines | default([]) }}" - - "{{ ceph_lockbox_partition_to_erase_path.stdout_lines | default([]) }}" - when: - - ceph_disk_present.rc == 0 - - (ceph_data_partlabels.rc == 0 or ceph_lockbox_partlabels.rc == 0) - # this should go away once 'ceph-volume lvm zap' is available - name: remove osd logical volumes command: "lvremove -f {{ item.data_vg }}/{{ item.data }}" @@ -420,50 +341,60 @@ - name: get ceph block partitions shell: | blkid | awk '/ceph block"/ { sub (":", "", $1); print $1 }' - when: ceph_block_partlabels.rc == 0 failed_when: false register: ceph_block_partition_to_erase_path - name: get ceph journal partitions shell: | blkid | awk '/ceph journal/ { sub (":", "", $1); print $1 }' - when: ceph_journal_partlabels.rc == 0 failed_when: false register: ceph_journal_partition_to_erase_path - name: get ceph db partitions shell: | blkid | awk '/ceph block.db/ { sub (":", "", $1); print $1 }' - when: ceph_db_partlabels.rc == 0 failed_when: false register: ceph_db_partition_to_erase_path - name: get ceph wal partitions shell: | blkid | awk '/ceph block.wal/ { sub (":", "", $1); print $1 }' - when: ceph_wal_partlabels.rc == 0 failed_when: false register: ceph_wal_partition_to_erase_path + - name: set_fact combined_devices_list + set_fact: + combined_devices_list: "{{ ceph_data_partition_to_erase_path.stdout_lines + + ceph_lockbox_partition_to_erase_path.stdout_lines + + ceph_block_partition_to_erase_path.stdout_lines + + ceph_journal_partition_to_erase_path.stdout_lines + + ceph_db_partition_to_erase_path.stdout_lines + + ceph_wal_partition_to_erase_path.stdout_lines }}" + + - name: resolve parent device + shell: echo /dev/$(lsblk -no pkname "{{ item }}") + register: tmp_resolved_parent_device + with_items: + - "{{ combined_devices_list }}" + + - name: set_fact resolved_parent_device + set_fact: + resolved_parent_device: "{{ tmp_resolved_parent_device.results | map(attribute='stdout') | list | unique }}" + - name: zap ceph journal/block db/block wal partitions shell: | # if the disk passed is a raw device AND the boot system disk - if echo "{{ item }}" | egrep -sq '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}$' && parted -s $(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}') print | grep -sq boot; then + if parted -s "{{ item }}" print | grep -sq boot; then echo "Looks like {{ item }} has a boot partition," echo "if you want to delete specific partitions point to the partition instead of the raw device" echo "Do not use your system disk!" exit 1 fi - raw_device=$(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}') - partition_nb=$(echo "{{ item }}" | egrep -o '[0-9]{1,2}$') - sgdisk --delete $partition_nb $raw_device + sgdisk -Z "{{ item }}" + dd if=/dev/zero of="{{ item }}" bs=1M count=200 + udevadm settle --timeout=600 with_items: - - "{{ ceph_block_partition_to_erase_path.stdout_lines | default([]) }}" - - "{{ ceph_journal_partition_to_erase_path.stdout_lines | default([]) }}" - - "{{ ceph_db_partition_to_erase_path.stdout_lines | default([]) }}" - - "{{ ceph_wal_partition_to_erase_path.stdout_lines | default([]) }}" - when: - - (ceph_block_partlabels.rc == 0 or ceph_journal_partlabels.rc == 0 or ceph_db_partlabels.rc == 0 or ceph_wal_partlabels.rc == 0) + - "{{ resolved_parent_device }}" - name: purge ceph mon cluster -- 2.39.5