Add a ceph-volume aware shrink-osd playbook

author Noah Watkins <nwatkins@redhat.com>

Wed, 31 Oct 2018 18:17:16 +0000 (11:17 -0700)

committer Sébastien Han <seb@redhat.com>

Thu, 8 Nov 2018 16:45:37 +0000 (17:45 +0100)
author Noah Watkins <nwatkins@redhat.com>
Wed, 31 Oct 2018 18:17:16 +0000 (11:17 -0700)
committer Sébastien Han <seb@redhat.com>
Thu, 8 Nov 2018 16:45:37 +0000 (17:45 +0100)
diff --git a/infrastructure-playbooks/shrink-osd.yml b/infrastructure-playbooks/shrink-osd.yml

new file mode 100644 (file)

index 0000000..e013528
--- /dev/null
+++ b/infrastructure-playbooks/shrink-osd.yml
@@ -0,0 +1,152 @@
+---
+# This playbook shrinks Ceph OSDs that have been created with ceph-volume.
+# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA
+#
+# Use it like this:
+# ansible-playbook lvm-shrink-osd.yml -e osd_to_kill=0,2,6
+#     Prompts for confirmation to shrink, defaults to no and
+#     doesn't shrink the cluster. yes shrinks the cluster.
+#
+# ansible-playbook -e ireallymeanit=yes|no lvm-shrink-osd.yml
+#     Overrides the prompt using -e option. Can be used in
+#     automation scripts to avoid interactive prompt.
+
+- name: gather facts and check the init system
+
+  hosts:
+    - "{{ mon_group_name|default('mons') }}"
+    - "{{ osd_group_name|default('osds') }}"
+
+  become: True
+  tasks:
+    - debug: msg="gather facts on all Ceph hosts for following reference"
+
+- name: confirm whether user really meant to remove osd(s) from the cluster
+
+  hosts:
+    - localhost
+
+  become: true
+
+  vars_prompt:
+    - name: ireallymeanit
+      prompt: Are you sure you want to shrink the cluster?
+      default: 'no'
+      private: no
+
+  vars:
+    mon_group_name: mons
+    osd_group_name: osds
+
+  pre_tasks:
+    - name: exit playbook, if user did not mean to shrink cluster
+      fail:
+        msg: "Exiting shrink-osd playbook, no osd(s) was/were removed..
+           To shrink the cluster, either say 'yes' on the prompt or
+           or use `-e ireallymeanit=yes` on the command line when
+           invoking the playbook"
+      when: ireallymeanit != 'yes'
+
+    - name: exit playbook, if no osd(s) was/were given
+      fail:
+        msg: "osd_to_kill must be declared
+          Exiting shrink-osd playbook, no OSD(s) was/were removed.
+           On the command line when invoking the playbook, you can use
+           -e osd_to_kill=0,1,2,3 argument."
+      when: osd_to_kill is not defined
+
+  roles:
+    - ceph-defaults
+
+  post_tasks:
+
+    - name: set_fact docker_exec_cmd build docker exec command (containerized)
+      set_fact:
+        docker_exec_cmd: "docker exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
+      when: containerized_deployment
+
+    - name: exit playbook, if can not connect to the cluster
+      command: "{{ docker_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
+      register: ceph_health
+      until: ceph_health.stdout.find("HEALTH") > -1
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      retries: 5
+      delay: 2
+
+    - name: find the host(s) where the osd(s) is/are running on
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd find {{ item }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      register: find_osd_hosts
+
+    - name: set_fact osd_hosts
+      set_fact:
+        osd_hosts: "{{ osd_hosts | default([]) + [ (item.stdout | from_json).crush_location.host ] }}"
+      with_items: "{{ find_osd_hosts.results }}"
+
+    - name: find lvm osd volumes on each host
+      ceph_volume:
+        action: "list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
+      with_items: "{{ osd_hosts }}"
+      delegate_to: "{{ item }}"
+      register: osd_volumes
+
+    - name: filter osd volumes to kill by osd - non container
+      set_fact:
+        osd_volumes_to_kill_non_container: "{{ osd_volumes_to_kill_non_container | default([]) + [ (item.1.stdout|from_json)[item.0] ] }}"
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_volumes.results }}"
+
+    - name: generate (host / volume) pairs to zap - non container
+      set_fact:
+        osd_host_volumes_to_kill_non_container: "{%- set _val = namespace(devs=[]) -%}
+        {%- for host in osd_hosts -%}
+        {%- for dev in osd_volumes_to_kill_non_container[loop.index-1] -%}
+        {%- set _val.devs = _val.devs + [{\"host\": host, \"path\": dev.path}] -%}
+        {%- endfor -%}
+        {%- endfor -%}
+        {{ _val.devs }}"
+
+    - name: mark osd(s) out of the cluster
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd out {{ item }}"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+
+    - name: stop osd(s) service
+      service:
+        name: ceph-osd@{{ item.0 }}
+        state: stopped
+        enabled: no
+      with_together:
+        - "{{ osd_to_kill.split(',') }}"
+        - "{{ osd_hosts }}"
+      delegate_to: "{{ item.1 }}"
+
+    - name: zap osd devices
+      ceph_volume:
+        action: "zap"
+        data: "{{ item.path }}"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
+      delegate_to: "{{ item.host }}"
+      with_items: "{{ osd_host_volumes_to_kill_non_container }}"
+
+    - name: purge osd(s) from the cluster
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd purge {{ item }} --yes-i-really-mean-it"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      with_items: "{{ osd_to_kill.split(',') }}"
+
+    - name: show ceph health
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+
+    - name: show ceph osd tree
+      command: "{{ docker_exec_cmd }} ceph --cluster {{ cluster }} osd tree"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
author	Noah Watkins <nwatkins@redhat.com>
	Wed, 31 Oct 2018 18:17:16 +0000 (11:17 -0700)
committer	Sébastien Han <seb@redhat.com>
	Thu, 8 Nov 2018 16:45:37 +0000 (17:45 +0100)