]> git.apps.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
Create recover-osds-after-ssd-journal-failure.yml
authorWingKai Ho <sanguosifang@163.com>
Tue, 21 Mar 2017 03:08:25 +0000 (11:08 +0800)
committerGitHub <noreply@github.com>
Tue, 21 Mar 2017 03:08:25 +0000 (11:08 +0800)
This playbook use to recover Ceph OSDs after ssd journal failure.

infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml [new file with mode: 0644]

diff --git a/infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml b/infrastructure-playbooks/recover-osds-after-ssd-journal-failure.yml
new file mode 100644 (file)
index 0000000..4423bd4
--- /dev/null
@@ -0,0 +1,128 @@
+---
+# This playbook use to recover Ceph OSDs after ssd journal failure.
+# You will also realise that it’s really simple to bring your 
+# OSDs back to life after replacing your faulty SSD with a new one.
+#
+# You should define `dev_ssds` variable for host which change ssds after
+# failture. 
+# 
+# For example in host_vars/hostname1.yml
+#
+# dev_ssds:
+# - device_name: sdd
+#   partitions:
+#   - index: 1
+#     size: 10G
+#     osd_id: 0
+#   - index: 2
+#     size: 10G
+#     osd_id: 1
+# - device_name: sdf  
+#   partitions:       
+#   - index: 1        
+#     size: 10G       
+#     osd_id: 2       
+#
+# @param device_name: The device name of new ssd
+# @param partitions:  The custom partition layout of new ssd
+# @param index:  The index of this partition
+# @param size:  The size of this partition
+# @param osd_id: Which osds's journal this pattition for.
+#
+# ansible-playbook recover-osds-after-ssd-journal-failure.yml
+#     Prompts for select which host to recover, defaults to null,  
+#     doesn't select host the recover ssd. Input the hostname
+#     which to recover osds after ssd journal failure
+#
+# ansible-playbook -e target_host=hostname \
+#     recover-osds-after-ssd-journal-failure.yml
+#     Overrides the prompt using -e option. Can be used in
+#     automation scripts to avoid interactive prompt.
+
+- hosts: localhost
+  gather_facts: no
+  vars_prompt:
+  - name: target_host
+    prompt: please enter the target hostname which to recover osds after ssd journal failure
+    private: no
+  tasks:
+    - add_host:
+        name: "{{ target_host }}"
+        groups: dynamically_created_hosts
+
+- hosts: dynamically_created_hosts
+  vars:
+   journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
+   dev_ssds: []
+
+  tasks:
+  - name: load a variable file for dev_ssds
+    include_vars: "{{ item }}"
+    with_first_found:
+      - files:
+          - "host_vars/{{ ansible_hostname }}.yml"
+        skip: true
+        
+  - name: get osd(s) if directory stat
+    stat:
+      path: "/var/lib/ceph/osd/ceph-{{ item.1.osd_id }}/journal_uuid"
+    register: osds_dir_stat
+    with_subelements:
+      - "{{ dev_ssds }}"
+      - partitions
+    when: dev_ssds is defined
+  - name: exit playbook osd(s) is not on this host
+    fail:
+        msg: exit playbook osds is not no this host
+    with_items:
+        osds_dir_stat.results
+    when:
+      -  osds_dir_stat is defined and item.stat.exists == false
+
+  - name: install sgdisk(gdisk)
+    package:
+      name: gdisk
+      state: present
+    when: dev_ssds is defined
+    
+  - name: get osd(s) journal uuid
+    shell: cat "/var/lib/ceph/osd/ceph-{{ item.1.osd_id }}/journal_uuid"
+    register: osds_uuid
+    with_subelements:
+      - "{{ dev_ssds }}"
+      - partitions
+    when: dev_ssds is defined
+
+  - name: make partitions on new ssd
+    shell: >
+      sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal" --typecode={{ item.item[1].index
+ }}:{{ journal_typecode }} --partition-guid={{ item.item[1].index }}:{{ item.stdout }} --mbrtogpt -- /dev/{{ item.item[0].device_name }}
+    with_items:
+      - "{{ osds_uuid.results }}"
+    when: dev_ssds is defined
+
+  - name: stop osd(s) service
+    service:
+      name: "ceph-osd@{{ item.item[1].osd_id }}"
+      state: stopped
+      enabled: no
+    with_items:
+      - "{{ osds_uuid.results }}"
+    when: dev_ssds is defined
+
+   - name: reinitialize osd(s) journal in new ssd
+     shell: >
+       ceph-osd -i {{ item.item[1].osd_id }} --mkjournal
+     with_items:
+       - "{{ osds_uuid.results }}"
+     when: dev_ssds is defined
+   - name: start osd(s) service
+     service:
+       name: "ceph-osd@{{ item.item[1].osd_id }}"
+       state: started
+       enabled: yes
+     with_items:
+       - "{{ osds_uuid.results }}"
+     when: dev_ssds is defined