From 235b1fccc6b4e1c9c40fa44cb71e995703d84a49 Mon Sep 17 00:00:00 2001
From: Rishabh Dave <ridave@redhat.com>
Date: Thu, 20 Jun 2019 15:29:25 +0530
Subject: [PATCH] add a playbook that removes mds from a node

Add a playbook, named "shrink-mds.yml", in infrastructure-playbooks/
that removes a MDS from a node in an already deployed Ceph cluster.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1677431
Signed-off-by: Rishabh Dave <ridave@redhat.com>
---
 infrastructure-playbooks/shrink-mds.yml | 109 ++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 infrastructure-playbooks/shrink-mds.yml

diff --git a/infrastructure-playbooks/shrink-mds.yml b/infrastructure-playbooks/shrink-mds.yml
new file mode 100644
index 000000000..947f88cbf
--- /dev/null
+++ b/infrastructure-playbooks/shrink-mds.yml
@@ -0,0 +1,109 @@
+---
+# This playbook removes the Ceph MDS from your cluster.
+#
+# Use it like this:
+# ansible-playbook shrink-mds.yml -e mds_to_kill=ceph-mds01
+#     Prompts for confirmation to shrink, defaults to no and
+#     doesn't shrink the cluster. yes shrinks the cluster.
+#
+# ansible-playbook -e ireallymeanit=yes|no shrink-mds.yml
+#     Overrides the prompt using -e option. Can be used in
+#     automation scripts to avoid interactive prompt.
+- name: gather facts and check the init system
+  hosts:
+    - "{{ mon_group_name | default('mons') }}"
+    - "{{ mds_group_name | default('mdss') }}"
+  become: true
+  tasks:
+    - debug:
+        msg: gather facts on all Ceph hosts for following reference
+
+- name: perform checks, remove mds and print cluster health
+  hosts: localhost
+  become: true
+  vars_prompt:
+    - name: ireallymeanit
+      prompt: Are you sure you want to shrink the cluster?
+      default: 'no'
+      private: no
+  pre_tasks:
+    - import_role:
+        name: ceph-defaults
+
+    - name: exit playbook, if no mds was given
+      when: mds_to_kill is not defined
+      fail:
+        msg: >
+          mds_to_kill must be declared.
+          Exiting shrink-cluster playbook, no MDS was removed. On the command
+          line when invoking the playbook, you can use
+          "-e mds_to_kill=ceph-mds1" argument.  You can only remove a single
+          MDS each time the playbook runs."
+
+    - name: exit playbook, if the mds is not part of the inventory
+      when: mds_to_kill not in groups[mds_group_name]
+      fail:
+        msg: "It seems that the host given is not part of your inventory,
+              please make sure it is."
+
+    - name: exit playbook, if user did not mean to shrink cluster
+      when: ireallymeanit != 'yes'
+      fail:
+        msg: "Exiting shrink-mds playbook, no mds was removed.
+           To shrink the cluster, either say 'yes' on the prompt or
+           or use `-e ireallymeanit=yes` on the command line when
+           invoking the playbook"
+
+    - import_role:
+        name: ceph-facts
+
+    - name: set_fact container_exec_cmd for mon0
+      set_fact:
+        container_exec_cmd: >
+          {{ container_binary }} exec ceph-mon-{{ hostvars[groups
+          [mon_group_name][0]]['ansible_hostname'] }}
+      when: containerized_deployment | bool
+
+    - name: exit playbook, if can not connect to the cluster
+      command: >
+        {{ container_exec_cmd | default('') }} timeout 5 ceph --cluster
+        {{ cluster }} health
+      register: ceph_health
+      until: ceph_health is succeeded
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      retries: 5
+      delay: 2
+
+    - name: set_fact mds_to_kill_hostname
+      set_fact:
+        mds_to_kill_hostname: "{{ hostvars[mds_to_kill]['ansible_hostname'] }}"
+
+  tasks:
+    - name: stop mds service(s)
+      service:
+        name: ceph-mds@{{ mds_to_kill_hostname }}
+        state: stopped
+        enabled: no
+      delegate_to: "{{ mds_to_kill }}"
+      failed_when: false
+
+    - name: purge mds store
+      file:
+        path: /var/lib/ceph/mds/{{ cluster }}-{{ mds_to_kill_hostname }}
+        state: absent
+      delegate_to: "{{ mds_to_kill }}"
+
+  post_tasks:
+    - name: verify that the mds has stopped
+      shell: >
+        {{ container_exec_cmd | default('') }} ceph --cluster ceph --conf
+        /etc/ceph/ceph.conf fs dump | grep mds0
+      register: result
+      failed_when: result.rc == 0
+      delegate_to: "{{ mds_to_kill }}"
+
+    - name: show ceph health
+      command: >
+        {{ container_exec_cmd | default('') }} ceph --cluster
+        {{ cluster }} -s
+      delegate_to: "{{ groups[mon_group_name][0] }}"
-- 
2.39.5