From: WingKai Ho Date: Tue, 21 Mar 2017 03:08:25 +0000 (+0800) Subject: Create recover-osds-after-ssd-journal-failure.yml X-Git-Tag: v2.2.0~16^2~13 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=7445113dc4f9c03fe180a6cccd3bab787fd72b43;p=ceph-ansible.git Create recover-osds-after-ssd-journal-failure.yml This playbook use to recover Ceph OSDs after ssd journal failure. --- diff --git a/infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml b/infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml new file mode 100644 index 000000000..4423bd4e3 --- /dev/null +++ b/infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml @@ -0,0 +1,128 @@ +--- +# This playbook use to recover Ceph OSDs after ssd journal failure. +# You will also realise that it’s really simple to bring your +# OSDs back to life after replacing your faulty SSD with a new one. +# +# You should define `dev_ssds` variable for host which change ssds after +# failture. +# +# For example in host_vars/hostname1.yml +# +# dev_ssds: +# - device_name: sdd +# partitions: +# - index: 1 +# size: 10G +# osd_id: 0 +# - index: 2 +# size: 10G +# osd_id: 1 +# - device_name: sdf +# partitions: +# - index: 1 +# size: 10G +# osd_id: 2 +# +# @param device_name: The device name of new ssd +# @param partitions: The custom partition layout of new ssd +# @param index: The index of this partition +# @param size: The size of this partition +# @param osd_id: Which osds's journal this pattition for. +# +# ansible-playbook recover-osds-after-ssd-journal-failure.yml +# Prompts for select which host to recover, defaults to null, +# doesn't select host the recover ssd. Input the hostname +# which to recover osds after ssd journal failure +# +# ansible-playbook -e target_host=hostname \ +# recover-osds-after-ssd-journal-failure.yml +# Overrides the prompt using -e option. Can be used in +# automation scripts to avoid interactive prompt. + +- hosts: localhost + gather_facts: no + vars_prompt: + - name: target_host + prompt: please enter the target hostname which to recover osds after ssd journal failure + private: no + tasks: + - add_host: + name: "{{ target_host }}" + groups: dynamically_created_hosts + +- hosts: dynamically_created_hosts + vars: + journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106 + dev_ssds: [] + + tasks: + - name: load a variable file for dev_ssds + include_vars: "{{ item }}" + with_first_found: + - files: + - "host_vars/{{ ansible_hostname }}.yml" + skip: true + + - name: get osd(s) if directory stat + stat: + path: "/var/lib/ceph/osd/ceph-{{ item.1.osd_id }}/journal_uuid" + register: osds_dir_stat + with_subelements: + - "{{ dev_ssds }}" + - partitions + when: dev_ssds is defined + + - name: exit playbook osd(s) is not on this host + fail: + msg: exit playbook osds is not no this host + with_items: + osds_dir_stat.results + when: + - osds_dir_stat is defined and item.stat.exists == false + + - name: install sgdisk(gdisk) + package: + name: gdisk + state: present + when: dev_ssds is defined + + - name: get osd(s) journal uuid + shell: cat "/var/lib/ceph/osd/ceph-{{ item.1.osd_id }}/journal_uuid" + register: osds_uuid + with_subelements: + - "{{ dev_ssds }}" + - partitions + when: dev_ssds is defined + + - name: make partitions on new ssd + shell: > + sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal" --typecode={{ item.item[1].index + }}:{{ journal_typecode }} --partition-guid={{ item.item[1].index }}:{{ item.stdout }} --mbrtogpt -- /dev/{{ item.item[0].device_name }} + with_items: + - "{{ osds_uuid.results }}" + when: dev_ssds is defined + + - name: stop osd(s) service + service: + name: "ceph-osd@{{ item.item[1].osd_id }}" + state: stopped + enabled: no + with_items: + - "{{ osds_uuid.results }}" + when: dev_ssds is defined + + - name: reinitialize osd(s) journal in new ssd + shell: > + ceph-osd -i {{ item.item[1].osd_id }} --mkjournal + with_items: + - "{{ osds_uuid.results }}" + when: dev_ssds is defined + + - name: start osd(s) service + service: + name: "ceph-osd@{{ item.item[1].osd_id }}" + state: started + enabled: yes + with_items: + - "{{ osds_uuid.results }}" + when: dev_ssds is defined