- name: set_fact container_exec_cmd for mon0
set_fact:
- container_exec_cmd: >
- {{ container_binary }} exec ceph-mon-{{ hostvars[groups
- [mon_group_name][0]]['ansible_hostname'] }}
+ container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}"
when: containerized_deployment | bool
- name: exit playbook, if can not connect to the cluster
- command: >
- {{ container_exec_cmd | default('') }} timeout 5 ceph --cluster
- {{ cluster }} health
+ command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} health"
register: ceph_health
until: ceph_health is succeeded
delegate_to: "{{ groups[mon_group_name][0] }}"
mds_to_kill_hostname: "{{ hostvars[mds_to_kill]['ansible_hostname'] }}"
tasks:
- - name: stop mds service(s)
- service:
- name: ceph-mds@{{ mds_to_kill_hostname }}
- state: stopped
- enabled: no
- delegate_to: "{{ mds_to_kill }}"
- failed_when: false
+ # get rid of this as soon as "systemctl stop ceph-msd@$HOSTNAME" also
+ # removes the MDS from the FS map.
+ - name: exit mds if it the deployment is containerized
+ when: containerized_deployment | bool
+ command: "{{ container_exec_cmd | default('') }} ceph tell mds.{{ mds_to_kill }} exit"
+ delegate_to: "{{ groups[mon_group_name][0] }}"
+
+ - name: stop mds service and verify it
+ block:
+ - name: stop mds service
+ service:
+ name: ceph-mds@{{ mds_to_kill_hostname }}
+ state: stopped
+ enabled: no
+ delegate_to: "{{ mds_to_kill }}"
+ failed_when: false
+
+ - name: ensure that the mds is stopped
+ command: "systemctl is-active ceph_mds@{{ mds_to_kill_hostname }}"
+ register: mds_to_kill_status
+ failed_when: mds_to_kill_status.rc == 0
+ delegate_to: "{{ mds_to_kill }}"
+ retries: 5
+ delay: 2
+
+ - name: fail if the mds is reported as active or standby
+ block:
+ - name: get ceph status
+ command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
+ register: ceph_status
+ delegate_to: "{{ groups[mon_group_name][0] }}"
+
+ - name: get active mds nodes list
+ set_fact:
+ active_mdss: "{{ active_mdss | default([]) + [item.name] }}"
+ with_items: "{{ (ceph_status.stdout | from_json)['fsmap']['by_rank'] }}"
+
+ - name: get ceph fs dump status
+ command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
+ register: ceph_fs_status
+ delegate_to: "{{ groups[mon_group_name][0] }}"
+
+ - name: create a list of standby mdss
+ set_fact:
+ standby_mdss: (ceph_fs_status.stdout | from_json)['standbys'] | map(attribute='name') | list
+
+ - name: fail if mds just killed is being reported as active or standby
+ fail:
+ msg: "mds node {{ mds_to_kill }} still up and running."
+ when:
+ - (mds_to_kill in active_mdss | default([])) or
+ (mds_to_kill in standby_mdss | default([]))
+
+ - name: delete the filesystem too if deleted the last mds too
+ command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs rm --yes-i-really-mean-it {{ cephfs }}"
+ delegate_to: "{{ groups[mon_group_name][0] }}"
- name: purge mds store
file:
delegate_to: "{{ mds_to_kill }}"
post_tasks:
- - name: verify that the mds has stopped
- shell: >
- {{ container_exec_cmd | default('') }} ceph --cluster ceph --conf
- /etc/ceph/ceph.conf fs dump | grep mds0
- register: result
- failed_when: result.rc == 0
- delegate_to: "{{ mds_to_kill }}"
-
- name: show ceph health
- command: >
- {{ container_exec_cmd | default('') }} ceph --cluster
- {{ cluster }} -s
+ command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s"
delegate_to: "{{ groups[mon_group_name][0] }}"