Rename ceph-disk version of shrink-osd playbook

author Noah Watkins <nwatkins@redhat.com>

Wed, 31 Oct 2018 18:14:08 +0000 (11:14 -0700)

committer Sébastien Han <seb@redhat.com>

Wed, 30 Jan 2019 13:58:59 +0000 (14:58 +0100)
author Noah Watkins <nwatkins@redhat.com>
Wed, 31 Oct 2018 18:14:08 +0000 (11:14 -0700)
committer Sébastien Han <seb@redhat.com>
Wed, 30 Jan 2019 13:58:59 +0000 (14:58 +0100)
diff --git a/infrastructure-playbooks/shrink-osd-ceph-disk.yml b/infrastructure-playbooks/shrink-osd-ceph-disk.yml

new file mode 100644 (file)

index 0000000..c1003ab
--- /dev/null
+++ b/infrastructure-playbooks/shrink-osd-ceph-disk.yml
@@ -0,0 +1,274 @@
+---
+# This playbook shrinks Ceph OSDs.
+# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA
+#
+# Use it like this:
+# ansible-playbook shrink-osd.yml -e osd_to_kill=0,2,6
+#     Prompts for confirmation to shrink, defaults to no and
+#     doesn't shrink the cluster. yes shrinks the cluster.
+#
+# ansible-playbook -e ireallymeanit=yes|no shrink-osd.yml
+#     Overrides the prompt using -e option. Can be used in
+#     automation scripts to avoid interactive prompt.
+
+- name: gather facts and check the init system
+
+  hosts:
+    - "{{ mon_group_name|default('mons') }}"
+    - "{{ osd_group_name|default('osds') }}"
+
+  become: True
+  tasks:
+    - debug: msg="gather facts on all Ceph hosts for following reference"
+
+- name: confirm whether user really meant to remove osd(s) from the cluster
+
+  hosts:
+    - localhost
+
+  become: true
+
+  vars_prompt:
+    - name: ireallymeanit
+      prompt: Are you sure you want to shrink the cluster?
+      default: 'no'
+      private: no
+
+  vars:
+    mon_group_name: mons
+    osd_group_name: osds
+
+  pre_tasks:
+    - name: exit playbook, if user did not mean to shrink cluster
+      fail:
+        msg: "Exiting shrink-osd playbook, no osd(s) was/were removed..
+           To shrink the cluster, either say 'yes' on the prompt or
+           or use `-e ireallymeanit=yes` on the command line when
+           invoking the playbook"
+      when: ireallymeanit != 'yes'
+
+    - name: exit playbook, if no osd(s) was/were given
+      fail:
+        msg: "osd_to_kill must be declared
+          Exiting shrink-osd playbook, no OSD(s) was/were removed.
+           On the command line when invoking the playbook, you can use
+           -e osd_to_kill=0,1,2,3 argument."
+      when: osd_to_kill is not defined
+
+  roles:
+    - ceph-defaults
+    - ceph-facts
+
+  post_tasks:
+
+    - name: set_fact docker_exec_cmd build docker exec command (containerized)
+      set_fact:
+        docker_exec_cmd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
+      when: containerized_deployment
+
+    - name: exit playbook, if can not connect to the cluster
+      command: "{{ docker_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
+      register: ceph_health
+      until: ceph_health.stdout.find("HEALTH") > -1
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      retries: 5
+      delay: 2
+
+    - name: find the host(s) where the osd(s) is/are running on
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd find {{ item }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      register: find_osd_hosts
+
+    - name: set_fact osd_hosts
+      set_fact:
+        osd_hosts: "{{ osd_hosts | default([]) + [ (item.stdout | from_json).crush_location.host ] }}"
+      with_items: "{{ find_osd_hosts.results }}"
+
+    - name: check if ceph admin key exists on the osd nodes
+      stat:
+        path: "/etc/ceph/{{ cluster }}.client.admin.keyring"
+      register: ceph_admin_key
+      with_items: "{{ osd_hosts }}"
+      delegate_to: "{{ item }}"
+      failed_when: false
+      when:
+        - not containerized_deployment
+
+    - name: fail when admin key is not present
+      fail:
+        msg: "The Ceph admin key is not present on the OSD node, please add it and remove it after the playbook is done."
+      with_items: "{{ ceph_admin_key.results }}"
+      when:
+        - not containerized_deployment
+        - item.stat.exists == false
+
+    # NOTE(leseb): using '>' is the only way I could have the command working
+    - name: find osd device based on the id
+      shell: >
+        docker run --privileged=true -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
+        {{ ceph_docker_registry}}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
+        list | awk  -v pattern=osd.{{ item.0 }} '$0 ~ pattern {print $1}'
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      register: osd_to_kill_disks
+      delegate_to: "{{ item.1 }}"
+      when:
+        - containerized_deployment
+
+    - name: find osd dedicated devices - container
+      shell: >
+        docker run --privileged=true -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
+        {{ ceph_docker_registry}}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
+        list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      register: osd_to_kill_disks_dedicated
+      delegate_to: "{{ item.1 }}"
+      when:
+        - containerized_deployment
+
+    - name: find osd dedicated devices - non container
+      shell: ceph-disk list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      register: osd_to_kill_disks_dedicated_non_container
+      delegate_to: "{{ item.1 }}"
+      when:
+        - not containerized_deployment
+
+    # if nvme then osd_to_kill_disks is nvme0n1, we need nvme0
+    # if ssd or hdd then osd_to_kill_disks is sda1, we need sda
+    - name: stop osd services (container)
+      service:
+        name: "ceph-osd@{{ item.0.stdout[:-2] | regex_replace('/dev/', '') if 'nvme' in item.0.stdout else item.0.stdout[:-1] | regex_replace('/dev/', '') }}"
+        state: stopped
+        enabled: no
+      with_together:
+        - "{{ osd_to_kill_disks.results }}"
+        - "{{ osd_hosts }}"
+      delegate_to: "{{ item.1 }}"
+      when:
+        - containerized_deployment
+
+    - name: resolve parent device
+      command: lsblk --nodeps -no pkname "{{ item.0.stdout }}"
+      register: resolved_parent_device
+      delegate_to: "{{ item.1 }}"
+      with_together:
+        - "{{ osd_to_kill_disks.results }}"
+        - "{{ osd_hosts }}"
+      when:
+        - containerized_deployment
+
+    - name: zap ceph osd disks
+      shell: |
+        docker run --rm \
+        --privileged=true \
+        --name ceph-osd-zap-{{ hostvars[item.1]['ansible_hostname'] }}-{{ item.0.stdout }} \
+        -v /dev/:/dev/ \
+        -e OSD_DEVICE=/dev/{{ item.0.stdout }} \
+        {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
+        zap_device
+      delegate_to: "{{ item.1 }}"
+      with_together:
+        - "{{ resolved_parent_device.results }}"
+        - "{{ osd_hosts }}"
+      when:
+        - containerized_deployment
+
+    - name: zap ceph osd partitions from dedicated devices
+      shell: |
+        for osd in {{ ' '.join(item.1.stdout_lines) }}
+        do
+          if [ -b "${osd}" ];
+          pkname=$(lsblk --nodeps -no PKNAME "${osd}");
+          then
+            echo zapping ceph osd partitions "${osd}";
+            docker run --rm \
+            --privileged=true \
+            --name ceph-osd-zap-{{ hostvars[item.0]['ansible_hostname'] }}-$(basename "${osd}") \
+            -v /dev/:/dev/ \
+            -e OSD_DEVICE="${osd}" \
+            {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
+            zap_device;
+            partprobe /dev/"${pkname}";
+          fi
+        done
+      with_together:
+        - "{{ osd_hosts }}"
+        - "{{ osd_to_kill_disks_dedicated.results }}"
+      delegate_to: "{{ item.0 }}"
+      when:
+        - containerized_deployment
+        - item.1 | length > 0
+
+    - name: deactivating osd(s)
+      command: ceph-disk deactivate --cluster {{ cluster }} --deactivate-by-id {{ item.0 }} --mark-out
+      run_once: true
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      delegate_to: "{{ item.1 }}"
+      when:
+        - not containerized_deployment
+
+    - name: destroying osd(s)
+      command: ceph-disk destroy --cluster {{ cluster }} --destroy-by-id {{ item.0 }} --zap
+      run_once: true
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      delegate_to: "{{ item.1 }}"
+      when:
+        - not containerized_deployment
+
+    - name: remove osd(s) from crush_map when ceph-disk destroy fail
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd crush remove osd.{{ item }}"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+
+    - name: delete osd(s) auth key when ceph-disk destroy fail
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} auth del osd.{{ item }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+
+    - name: deallocate osd(s) id when ceph-disk destroy fail
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd rm {{ item }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+
+    - name: zap dedicated partitions
+      shell: |
+          for osd in {{ ' '.join(item.1.stdout_lines) }}
+          do
+            if [ -b "${osd}" ];
+            then
+            echo zapping dedicated partition "${osd}"
+            pkname=$(lsblk --nodeps -no PKNAME "${osd}")
+            wipefs --all "${osd}"
+            dd if=/dev/zero of="${osd}" bs=1M count=10
+            partition_nb=$(echo "${osd}" | grep -oE '[0-9]{1,2}$')
+            sgdisk --delete $partition_nb /dev/"${pkname}"
+            partprobe /dev/"${pkname}"
+            fi
+          done
+      with_together:
+        - "{{ osd_hosts }}"
+        - "{{ osd_to_kill_disks_dedicated_non_container.results }}"
+      delegate_to: "{{ item.0 }}"
+      when:
+        - not containerized_deployment
+        - item.1 | length > 0
+
+    - name: show ceph health
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+
+    - name: show ceph osd tree
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd tree"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
diff --git a/infrastructure-playbooks/shrink-osd.yml b/infrastructure-playbooks/shrink-osd.yml

deleted file mode 100644 (file)

index c1003ab..0000000
--- a/infrastructure-playbooks/shrink-osd.yml
+++ /dev/null
@@ -1,274 +0,0 @@
----
-# This playbook shrinks Ceph OSDs.
-# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA
-#
-# Use it like this:
-# ansible-playbook shrink-osd.yml -e osd_to_kill=0,2,6
-#     Prompts for confirmation to shrink, defaults to no and
-#     doesn't shrink the cluster. yes shrinks the cluster.
-#
-# ansible-playbook -e ireallymeanit=yes|no shrink-osd.yml
-#     Overrides the prompt using -e option. Can be used in
-#     automation scripts to avoid interactive prompt.
-
-- name: gather facts and check the init system
-
-  hosts:
-    - "{{ mon_group_name|default('mons') }}"
-    - "{{ osd_group_name|default('osds') }}"
-
-  become: True
-  tasks:
-    - debug: msg="gather facts on all Ceph hosts for following reference"
-
-- name: confirm whether user really meant to remove osd(s) from the cluster
-
-  hosts:
-    - localhost
-
-  become: true
-
-  vars_prompt:
-    - name: ireallymeanit
-      prompt: Are you sure you want to shrink the cluster?
-      default: 'no'
-      private: no
-
-  vars:
-    mon_group_name: mons
-    osd_group_name: osds
-
-  pre_tasks:
-    - name: exit playbook, if user did not mean to shrink cluster
-      fail:
-        msg: "Exiting shrink-osd playbook, no osd(s) was/were removed..
-           To shrink the cluster, either say 'yes' on the prompt or
-           or use `-e ireallymeanit=yes` on the command line when
-           invoking the playbook"
-      when: ireallymeanit != 'yes'
-
-    - name: exit playbook, if no osd(s) was/were given
-      fail:
-        msg: "osd_to_kill must be declared
-          Exiting shrink-osd playbook, no OSD(s) was/were removed.
-           On the command line when invoking the playbook, you can use
-           -e osd_to_kill=0,1,2,3 argument."
-      when: osd_to_kill is not defined
-
-  roles:
-    - ceph-defaults
-    - ceph-facts
-
-  post_tasks:
-
-    - name: set_fact docker_exec_cmd build docker exec command (containerized)
-      set_fact:
-        docker_exec_cmd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
-      when: containerized_deployment
-
-    - name: exit playbook, if can not connect to the cluster
-      command: "{{ docker_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
-      register: ceph_health
-      until: ceph_health.stdout.find("HEALTH") > -1
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-      retries: 5
-      delay: 2
-
-    - name: find the host(s) where the osd(s) is/are running on
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd find {{ item }}"
-      with_items: "{{ osd_to_kill.split(',') }}"
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-      register: find_osd_hosts
-
-    - name: set_fact osd_hosts
-      set_fact:
-        osd_hosts: "{{ osd_hosts | default([]) + [ (item.stdout | from_json).crush_location.host ] }}"
-      with_items: "{{ find_osd_hosts.results }}"
-
-    - name: check if ceph admin key exists on the osd nodes
-      stat:
-        path: "/etc/ceph/{{ cluster }}.client.admin.keyring"
-      register: ceph_admin_key
-      with_items: "{{ osd_hosts }}"
-      delegate_to: "{{ item }}"
-      failed_when: false
-      when:
-        - not containerized_deployment
-
-    - name: fail when admin key is not present
-      fail:
-        msg: "The Ceph admin key is not present on the OSD node, please add it and remove it after the playbook is done."
-      with_items: "{{ ceph_admin_key.results }}"
-      when:
-        - not containerized_deployment
-        - item.stat.exists == false
-
-    # NOTE(leseb): using '>' is the only way I could have the command working
-    - name: find osd device based on the id
-      shell: >
-        docker run --privileged=true -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
-        {{ ceph_docker_registry}}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
-        list | awk  -v pattern=osd.{{ item.0 }} '$0 ~ pattern {print $1}'
-      with_together:
-        - "{{ osd_to_kill.split(',') }}"
-        - "{{ osd_hosts }}"
-      register: osd_to_kill_disks
-      delegate_to: "{{ item.1 }}"
-      when:
-        - containerized_deployment
-
-    - name: find osd dedicated devices - container
-      shell: >
-        docker run --privileged=true -v /dev:/dev --entrypoint /usr/sbin/ceph-disk
-        {{ ceph_docker_registry}}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
-        list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
-      with_together:
-        - "{{ osd_to_kill.split(',') }}"
-        - "{{ osd_hosts }}"
-      register: osd_to_kill_disks_dedicated
-      delegate_to: "{{ item.1 }}"
-      when:
-        - containerized_deployment
-
-    - name: find osd dedicated devices - non container
-      shell: ceph-disk list | grep osd.{{ item.0 }} | grep -Eo '/dev/([hsv]d[a-z]{1,2})[0-9]{1,2}|/dev/nvme[0-9]n[0-9]p[0-9]'
-      with_together:
-        - "{{ osd_to_kill.split(',') }}"
-        - "{{ osd_hosts }}"
-      register: osd_to_kill_disks_dedicated_non_container
-      delegate_to: "{{ item.1 }}"
-      when:
-        - not containerized_deployment
-
-    # if nvme then osd_to_kill_disks is nvme0n1, we need nvme0
-    # if ssd or hdd then osd_to_kill_disks is sda1, we need sda
-    - name: stop osd services (container)
-      service:
-        name: "ceph-osd@{{ item.0.stdout[:-2] | regex_replace('/dev/', '') if 'nvme' in item.0.stdout else item.0.stdout[:-1] | regex_replace('/dev/', '') }}"
-        state: stopped
-        enabled: no
-      with_together:
-        - "{{ osd_to_kill_disks.results }}"
-        - "{{ osd_hosts }}"
-      delegate_to: "{{ item.1 }}"
-      when:
-        - containerized_deployment
-
-    - name: resolve parent device
-      command: lsblk --nodeps -no pkname "{{ item.0.stdout }}"
-      register: resolved_parent_device
-      delegate_to: "{{ item.1 }}"
-      with_together:
-        - "{{ osd_to_kill_disks.results }}"
-        - "{{ osd_hosts }}"
-      when:
-        - containerized_deployment
-
-    - name: zap ceph osd disks
-      shell: |
-        docker run --rm \
-        --privileged=true \
-        --name ceph-osd-zap-{{ hostvars[item.1]['ansible_hostname'] }}-{{ item.0.stdout }} \
-        -v /dev/:/dev/ \
-        -e OSD_DEVICE=/dev/{{ item.0.stdout }} \
-        {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
-        zap_device
-      delegate_to: "{{ item.1 }}"
-      with_together:
-        - "{{ resolved_parent_device.results }}"
-        - "{{ osd_hosts }}"
-      when:
-        - containerized_deployment
-
-    - name: zap ceph osd partitions from dedicated devices
-      shell: |
-        for osd in {{ ' '.join(item.1.stdout_lines) }}
-        do
-          if [ -b "${osd}" ];
-          pkname=$(lsblk --nodeps -no PKNAME "${osd}");
-          then
-            echo zapping ceph osd partitions "${osd}";
-            docker run --rm \
-            --privileged=true \
-            --name ceph-osd-zap-{{ hostvars[item.0]['ansible_hostname'] }}-$(basename "${osd}") \
-            -v /dev/:/dev/ \
-            -e OSD_DEVICE="${osd}" \
-            {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
-            zap_device;
-            partprobe /dev/"${pkname}";
-          fi
-        done
-      with_together:
-        - "{{ osd_hosts }}"
-        - "{{ osd_to_kill_disks_dedicated.results }}"
-      delegate_to: "{{ item.0 }}"
-      when:
-        - containerized_deployment
-        - item.1 | length > 0
-
-    - name: deactivating osd(s)
-      command: ceph-disk deactivate --cluster {{ cluster }} --deactivate-by-id {{ item.0 }} --mark-out
-      run_once: true
-      with_together:
-        - "{{ osd_to_kill.split(',') }}"
-        - "{{ osd_hosts }}"
-      delegate_to: "{{ item.1 }}"
-      when:
-        - not containerized_deployment
-
-    - name: destroying osd(s)
-      command: ceph-disk destroy --cluster {{ cluster }} --destroy-by-id {{ item.0 }} --zap
-      run_once: true
-      with_together:
-        - "{{ osd_to_kill.split(',') }}"
-        - "{{ osd_hosts }}"
-      delegate_to: "{{ item.1 }}"
-      when:
-        - not containerized_deployment
-
-    - name: remove osd(s) from crush_map when ceph-disk destroy fail
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd crush remove osd.{{ item }}"
-      run_once: true
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-      with_items: "{{ osd_to_kill.split(',') }}"
-
-    - name: delete osd(s) auth key when ceph-disk destroy fail
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} auth del osd.{{ item }}"
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-      with_items: "{{ osd_to_kill.split(',') }}"
-
-    - name: deallocate osd(s) id when ceph-disk destroy fail
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd rm {{ item }}"
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-      with_items: "{{ osd_to_kill.split(',') }}"
-
-    - name: zap dedicated partitions
-      shell: |
-          for osd in {{ ' '.join(item.1.stdout_lines) }}
-          do
-            if [ -b "${osd}" ];
-            then
-            echo zapping dedicated partition "${osd}"
-            pkname=$(lsblk --nodeps -no PKNAME "${osd}")
-            wipefs --all "${osd}"
-            dd if=/dev/zero of="${osd}" bs=1M count=10
-            partition_nb=$(echo "${osd}" | grep -oE '[0-9]{1,2}$')
-            sgdisk --delete $partition_nb /dev/"${pkname}"
-            partprobe /dev/"${pkname}"
-            fi
-          done
-      with_together:
-        - "{{ osd_hosts }}"
-        - "{{ osd_to_kill_disks_dedicated_non_container.results }}"
-      delegate_to: "{{ item.0 }}"
-      when:
-        - not containerized_deployment
-        - item.1 | length > 0
-
-    - name: show ceph health
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s"
-      delegate_to: "{{ groups[mon_group_name][0] }}"
-
-    - name: show ceph osd tree
-      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd tree"
-      delegate_to: "{{ groups[mon_group_name][0] }}"
author	Noah Watkins <nwatkins@redhat.com>
	Wed, 31 Oct 2018 18:14:08 +0000 (11:14 -0700)
committer	Sébastien Han <seb@redhat.com>
	Wed, 30 Jan 2019 13:58:59 +0000 (14:58 +0100)
infrastructure-playbooks/shrink-osd-ceph-disk.yml	[new file with mode: 0644]	patch \| blob
infrastructure-playbooks/shrink-osd.yml	[deleted file]	patch \| blob \| history