shrink-mds: refact post tasks

author Guillaume Abrioux <gabrioux@redhat.com>

Wed, 3 Jul 2019 08:45:46 +0000 (10:45 +0200)

committer Guillaume Abrioux <gabrioux@redhat.com>

Mon, 8 Jul 2019 09:05:28 +0000 (11:05 +0200)
author Guillaume Abrioux <gabrioux@redhat.com>
Wed, 3 Jul 2019 08:45:46 +0000 (10:45 +0200)
committer Guillaume Abrioux <gabrioux@redhat.com>
Mon, 8 Jul 2019 09:05:28 +0000 (11:05 +0200)
diff --git a/infrastructure-playbooks/shrink-mds.yml b/infrastructure-playbooks/shrink-mds.yml

index 947f88cbfa6fa9d7b04caa84735666271ab8ba7d..470fcc8249592a2920e98c7b81611509abded6d3 100644 (file)
--- a/infrastructure-playbooks/shrink-mds.yml
+++ b/infrastructure-playbooks/shrink-mds.yml
@@ -59,15 +59,11 @@
  
      - name: set_fact container_exec_cmd for mon0
        set_fact:
-        container_exec_cmd: >
-          {{ container_binary }} exec ceph-mon-{{ hostvars[groups
-          [mon_group_name][0]]['ansible_hostname'] }}
+        container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
        when: containerized_deployment | bool
  
      - name: exit playbook, if can not connect to the cluster
-      command: >
-        {{ container_exec_cmd | default('') }} timeout 5 ceph --cluster
-        {{ cluster }} health
+      command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} health"
        register: ceph_health
        until: ceph_health is succeeded
        delegate_to: "{{ groups[mon_group_name][0] }}"
@@ -79,13 +75,62 @@
          mds_to_kill_hostname: "{{ hostvars[mds_to_kill]['ansible_hostname'] }}"
  
    tasks:
-    - name: stop mds service(s)
-      service:
-        name: ceph-mds@{{ mds_to_kill_hostname }}
-        state: stopped
-        enabled: no
-      delegate_to: "{{ mds_to_kill }}"
-      failed_when: false
+    # get rid of this as soon as "systemctl stop ceph-msd@$HOSTNAME" also
+    # removes the MDS from the FS map.
+    - name: exit mds if it the deployment is containerized
+      when: containerized_deployment | bool
+      command: "{{ container_exec_cmd | default('') }} ceph tell mds.{{ mds_to_kill }} exit"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+
+    - name: stop mds service and verify it
+      block:
+        - name: stop mds service
+          service:
+            name: ceph-mds@{{ mds_to_kill_hostname }}
+            state: stopped
+            enabled: no
+          delegate_to: "{{ mds_to_kill }}"
+          failed_when: false
+
+        - name: ensure that the mds is stopped
+          command: "systemctl is-active ceph_mds@{{ mds_to_kill_hostname }}"
+          register: mds_to_kill_status
+          failed_when: mds_to_kill_status.rc == 0
+          delegate_to: "{{ mds_to_kill }}"
+          retries: 5
+          delay: 2
+
+    - name: fail if the mds is reported as active or standby
+      block:
+        - name: get ceph status
+          command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
+          register: ceph_status
+          delegate_to: "{{ groups[mon_group_name][0] }}"
+
+        - name: get active mds nodes list
+          set_fact:
+            active_mdss: "{{ active_mdss | default([]) + [item.name] }}"
+          with_items: "{{ (ceph_status.stdout | from_json)['fsmap']['by_rank'] }}"
+
+        - name: get ceph fs dump status
+          command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
+          register: ceph_fs_status
+          delegate_to: "{{ groups[mon_group_name][0] }}"
+
+        - name: create a list of standby mdss
+          set_fact:
+            standby_mdss: (ceph_fs_status.stdout | from_json)['standbys'] | map(attribute='name') | list
+
+        - name: fail if mds just killed is being reported as active or standby
+          fail:
+            msg: "mds node {{ mds_to_kill }} still up and running."
+          when:
+            - (mds_to_kill in active_mdss | default([])) or
+              (mds_to_kill in standby_mdss | default([]))
+
+    - name: delete the filesystem too if deleted the last mds too
+      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs rm --yes-i-really-mean-it {{ cephfs }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
  
      - name: purge mds store
        file:
@@ -94,16 +139,6 @@
        delegate_to: "{{ mds_to_kill }}"
  
    post_tasks:
-    - name: verify that the mds has stopped
-      shell: >
-        {{ container_exec_cmd | default('') }} ceph --cluster ceph --conf
-        /etc/ceph/ceph.conf fs dump | grep mds0
-      register: result
-      failed_when: result.rc == 0
-      delegate_to: "{{ mds_to_kill }}"
-
      - name: show ceph health
-      command: >
-        {{ container_exec_cmd | default('') }} ceph --cluster
-        {{ cluster }} -s
+      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s"
        delegate_to: "{{ groups[mon_group_name][0] }}"
author	Guillaume Abrioux <gabrioux@redhat.com>
	Wed, 3 Jul 2019 08:45:46 +0000 (10:45 +0200)
committer	Guillaume Abrioux <gabrioux@redhat.com>
	Mon, 8 Jul 2019 09:05:28 +0000 (11:05 +0200)