pushd %{buildroot}%{_datarootdir}/ceph-ansible
rm -r roles/ceph-common-coreos
rm group_vars/common-coreoss.yml.sample
+ rm -r infrastructure-playbooks/untested-by-ci
popd
# Strip iscsi files.
+++ /dev/null
----
-# This playbook was made to automate Ceph servers maintenance
-# Typical use case: hardware change
-# By running this playbook you will set the 'noout' flag on your
-# cluster, which means that OSD **can't** be marked as out
-# of the CRUSH map, but they will be marked as down.
-# Basically we tell the cluster to don't move any data since
-# the operation won't last for too long.
-
-- hosts: <your_host>
- gather_facts: False
-
- tasks:
-
- - name: Set the noout flag
- command: ceph osd set noout
- delegate_to: <your_monitor>
-
- - name: Turn off the server
- command: poweroff
-
- - name: Wait for the server to go down
- local_action: >
- wait_for host=<your_host>
- port=22
- state=stopped
-
- - name: Wait for the server to come up
- local_action: >
- wait_for host=<your_host>
- port=22
- delay=10
- timeout=3600
-
- - name: Unset the noout flag
- command: ceph osd unset noout
- delegate_to: <your_monitor>
+++ /dev/null
----
-# This playbook was meant to upgrade a node from Ubuntu to RHEL.
-# We are performing a set of actions prior to reboot the node.
-# The node reboots via PXE and gets its new operating system.
-# This playbook only works for monitors and OSDs.
-# Note that some of the checks are ugly:
-# ie: the when migration_completed.stat.exists
-# can be improved with includes, however I wanted to keep a single file...
-#
-
-- hosts: mons
- serial: 1
- sudo: True
-
- vars:
- backup_dir: /tmp/
-
- tasks:
-
- - name: Check if the node has be migrated already
- stat: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
- register: migration_completed
- failed_when: false
-
- - name: Check for failed run
- stat: >
- path=/var/lib/ceph/{{ ansible_hostname }}.tar
- register: mon_archive_leftover
-
- - fail: msg="Looks like an archive is already there, please remove it!"
- when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True
-
- - name: Compress the store as much as possible
- command: ceph tell mon.{{ ansible_hostname }} compact
- when: migration_completed.stat.exists == False
-
- - name: Check if sysvinit
- stat: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
- register: monsysvinit
- changed_when: False
-
- - name: Check if upstart
- stat: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
- register: monupstart
- changed_when: False
-
- - name: Check if init does what it is supposed to do (Sysvinit)
- shell: >
- ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null
- register: ceph_status_sysvinit
- changed_when: False
-
- # can't complete the condition since the previous taks never ran...
- - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
- when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
-
- - name: Check if init does what it is supposed to do (upstart)
- shell: >
- ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null
- register: ceph_status_upstart
- changed_when: False
-
- - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
- when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
-
- - name: Restart the Monitor after compaction (Upstart)
- service: >
- name=ceph-mon
- state=restarted
- args=id={{ ansible_hostname }}
- when: monupstart.stat.exists == True and migration_completed.stat.exists == False
-
- - name: Restart the Monitor after compaction (Sysvinit)
- service: >
- name=ceph
- state=restarted
- args=mon
- when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
-
- - name: Wait for the monitor to be up again
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- port=6789
- timeout=10
- when: migration_completed.stat.exists == False
-
- - name: Stop the monitor (Upstart)
- service: >
- name=ceph-mon
- state=stopped
- args=id={{ ansible_hostname }}
- when: monupstart.stat.exists == True and migration_completed.stat.exists == False
-
- - name: Stop the monitor (Sysvinit)
- service: >
- name=ceph
- state=stopped
- args=mon
- when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
-
- - name: Wait for the monitor to be down
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- port=6789
- timeout=10
- state=stopped
- when: migration_completed.stat.exists == False
-
- - name: Create a backup directory
- file: >
- path={{ backup_dir }}/monitors-backups
- state=directory
- owner=root
- group=root
- mode=0644
- delegate_to: "{{ item }}"
- with_items: "{{ groups.backup[0] }}"
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): should we convert upstart to sysvinit here already?
- - name: Archive monitor stores
- shell: >
- tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar
- chdir=/var/lib/ceph/
- creates={{ ansible_hostname }}.tar
- when: migration_completed.stat.exists == False
-
- - name: Scp the Monitor store
- fetch: >
- src=/var/lib/ceph/{{ ansible_hostname }}.tar
- dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
- flat=yes
- when: migration_completed.stat.exists == False
-
- - name: Reboot the server
- command: reboot
- when: migration_completed.stat.exists == False
-
- - name: Wait for the server to come up
- local_action: >
- wait_for
- port=22
- delay=10
- timeout=3600
- when: migration_completed.stat.exists == False
-
- - name: Wait a bit more to be sure that the server is ready
- pause: seconds=20
- when: migration_completed.stat.exists == False
-
- - name: Check if sysvinit
- stat: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
- register: monsysvinit
- changed_when: False
-
- - name: Check if upstart
- stat: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
- register: monupstart
- changed_when: False
-
- - name: Make sure the monitor is stopped (Upstart)
- service: >
- name=ceph-mon
- state=stopped
- args=id={{ ansible_hostname }}
- when: monupstart.stat.exists == True and migration_completed.stat.exists == False
-
- - name: Make sure the monitor is stopped (Sysvinit)
- service: >
- name=ceph
- state=stopped
- args=mon
- when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
-
- # NOTE (leseb): 'creates' was added in Ansible 1.6
- - name: Copy and unarchive the monitor store
- unarchive: >
- src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
- dest=/var/lib/ceph/
- copy=yes
- mode=0600
- creates=etc/ceph/ceph.conf
- when: migration_completed.stat.exists == False
-
- - name: Copy keys and configs
- shell: >
- cp etc/ceph/* /etc/ceph/
- chdir=/var/lib/ceph/
- when: migration_completed.stat.exists == False
-
- - name: Configure RHEL7 for sysvinit
- shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
- # so we directly call sysvinit
- - name: Start the monitor
- service: >
- name=ceph
- state=started
- args=mon
- when: migration_completed.stat.exists == False
-
- - name: Wait for the Monitor to be up again
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- port=6789
- timeout=10
- when: migration_completed.stat.exists == False
-
- - name: Waiting for the monitor to join the quorum...
- shell: >
- ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }}
- register: result
- until: result.rc == 0
- retries: 5
- delay: 10
- delegate_to: "{{ item }}"
- with_items: "{{ groups.backup[0] }}"
- when: migration_completed.stat.exists == False
-
- - name: Done moving to the next monitor
- file: >
- path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
- state=touch
- owner=root
- group=root
- mode=0600
- when: migration_completed.stat.exists == False
-
-- hosts: osds
- serial: 1
- sudo: True
-
- vars:
- backup_dir: /tmp/
-
- tasks:
- - name: Check if the node has be migrated already
- stat: >
- path=/var/lib/ceph/migration_completed
- register: migration_completed
- failed_when: false
-
- - name: Check for failed run
- stat: >
- path=/var/lib/ceph/{{ ansible_hostname }}.tar
- register: osd_archive_leftover
-
- - fail: msg="Looks like an archive is already there, please remove it!"
- when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True
-
- - name: Check if init does what it is supposed to do (Sysvinit)
- shell: >
- ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null
- register: ceph_status_sysvinit
- changed_when: False
-
- # can't complete the condition since the previous taks never ran...
- - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
- when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
-
- - name: Check if init does what it is supposed to do (upstart)
- shell: >
- ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]"
- register: ceph_status_upstart
- changed_when: False
-
- - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
- when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
-
- - name: Set the noout flag
- command: ceph osd set noout
- delegate_to: "{{ item }}"
- with_items: "{{ groups[mon_group_name][0] }}"
- when: migration_completed.stat.exists == False
-
- - name: Check if sysvinit
- shell: stat /var/lib/ceph/osd/ceph-*/sysvinit
- register: osdsysvinit
- failed_when: false
- changed_when: False
-
- - name: Check if upstart
- shell: stat /var/lib/ceph/osd/ceph-*/upstart
- register: osdupstart
- failed_when: false
- changed_when: False
-
- - name: Archive ceph configs
- shell: >
- tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar
- chdir=/var/lib/ceph/
- creates={{ ansible_hostname }}.tar
- when: migration_completed.stat.exists == False
-
- - name: Create backup directory
- file: >
- path={{ backup_dir }}/osds-backups
- state=directory
- owner=root
- group=root
- mode=0644
- delegate_to: "{{ item }}"
- with_items: "{{ groups.backup[0] }}"
- when: migration_completed.stat.exists == False
-
- - name: Scp OSDs dirs and configs
- fetch: >
- src=/var/lib/ceph/{{ ansible_hostname }}.tar
- dest={{ backup_dir }}/osds-backups/
- flat=yes
- when: migration_completed.stat.exists == False
-
- - name: Collect OSD ports
- shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq
- register: osd_ports
- when: migration_completed.stat.exists == False
-
- - name: Gracefully stop the OSDs (Upstart)
- service: >
- name=ceph-osd-all
- state=stopped
- when: osdupstart.rc == 0 and migration_completed.stat.exists == False
-
- - name: Gracefully stop the OSDs (Sysvinit)
- service: >
- name=ceph
- state=stopped
- args=mon
- when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False
-
- - name: Wait for the OSDs to be down
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- port={{ item }}
- timeout=10
- state=stopped
- with_items: "{{ osd_ports.stdout_lines }}"
- when: migration_completed.stat.exists == False
-
- - name: Configure RHEL with sysvinit
- shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
- when: migration_completed.stat.exists == False
-
- - name: Reboot the server
- command: reboot
- when: migration_completed.stat.exists == False
-
- - name: Wait for the server to come up
- local_action: >
- wait_for
- port=22
- delay=10
- timeout=3600
- when: migration_completed.stat.exists == False
-
- - name: Wait a bit to be sure that the server is ready for scp
- pause: seconds=20
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): 'creates' was added in Ansible 1.6
- - name: Copy and unarchive the OSD configs
- unarchive: >
- src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar
- dest=/var/lib/ceph/
- copy=yes
- mode=0600
- creates=etc/ceph/ceph.conf
- when: migration_completed.stat.exists == False
-
- - name: Copy keys and configs
- shell: >
- cp etc/ceph/* /etc/ceph/
- chdir=/var/lib/ceph/
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
- # so we directly call sysvinit
- - name: Start all the OSDs
- service: >
- name=ceph-osd-all
- state=started
- args=osd
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): this is tricky unless this is set into the ceph.conf
- # listened ports can be predicted, thus they will change after each restart
-# - name: Wait for the OSDs to be up again
-# local_action: >
-# wait_for
-# host={{ ansible_ssh_host | default(inventory_hostname) }}
-# port={{ item }}
-# timeout=30
-# with_items:
-# - "{{ osd_ports.stdout_lines }}"
-
- - name: Waiting for clean PGs...
- shell: >
- test "[""$(ceph -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
- register: result
- until: result.rc == 0
- retries: 10
- delay: 10
- delegate_to: "{{ item }}"
- with_items: "{{ groups.backup[0] }}"
- when: migration_completed.stat.exists == False
-
- - name: Done moving to the next OSD
- file: >
- path=/var/lib/ceph/migration_completed
- state=touch
- owner=root
- group=root
- mode=0600
- when: migration_completed.stat.exists == False
-
- - name: Unset the noout flag
- command: ceph osd unset noout
- delegate_to: "{{ item }}"
- with_items: "{{ groups[mon_group_name][0] }}"
- when: migration_completed.stat.exists == False
-
-- hosts: rgws
- serial: 1
- sudo: True
-
- vars:
- backup_dir: /tmp/
-
- tasks:
- - name: Check if the node has be migrated already
- stat: >
- path=/var/lib/ceph/radosgw/migration_completed
- register: migration_completed
- failed_when: false
-
- - name: Check for failed run
- stat: >
- path=/var/lib/ceph/{{ ansible_hostname }}.tar
- register: rgw_archive_leftover
-
- - fail: msg="Looks like an archive is already there, please remove it!"
- when: migration_completed.stat.exists == False and rgw_archive_leftover.stat.exists == True
-
- - name: Archive rados gateway configs
- shell: >
- tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar
- chdir=/var/lib/ceph/
- creates={{ ansible_hostname }}.tar
- when: migration_completed.stat.exists == False
-
- - name: Create backup directory
- file: >
- path={{ backup_dir }}/rgws-backups
- state=directory
- owner=root
- group=root
- mode=0644
- delegate_to: "{{ item }}"
- with_items: "{{ groups.backup[0] }}"
- when: migration_completed.stat.exists == False
-
- - name: Scp RGWs dirs and configs
- fetch: >
- src=/var/lib/ceph/{{ ansible_hostname }}.tar
- dest={{ backup_dir }}/rgws-backups/
- flat=yes
- when: migration_completed.stat.exists == False
-
- - name: Gracefully stop the rados gateway
- service: >
- name={{ item }}
- state=stopped
- with_items:
- - radosgw
- when: migration_completed.stat.exists == False
-
- - name: Wait for radosgw to be down
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- path=/tmp/radosgw.sock
- state=absent
- timeout=30
- when: migration_completed.stat.exists == False
-
- - name: Reboot the server
- command: reboot
- when: migration_completed.stat.exists == False
-
- - name: Wait for the server to come up
- local_action: >
- wait_for
- port=22
- delay=10
- timeout=3600
- when: migration_completed.stat.exists == False
-
- - name: Wait a bit to be sure that the server is ready for scp
- pause: seconds=20
- when: migration_completed.stat.exists == False
-
- # NOTE (leseb): 'creates' was added in Ansible 1.6
- - name: Copy and unarchive the OSD configs
- unarchive: >
- src={{ backup_dir }}/rgws-backups/{{ ansible_hostname }}.tar
- dest=/var/lib/ceph/
- copy=yes
- mode=0600
- creates=etc/ceph/ceph.conf
- when: migration_completed.stat.exists == False
-
- - name: Copy keys and configs
- shell: >
- {{ item }}
- chdir=/var/lib/ceph/
- with_items:
- - cp etc/ceph/* /etc/ceph/
- when: migration_completed.stat.exists == False
-
- - name: Start rados gateway
- service: >
- name={{ item }}
- state=started
- with_items:
- - radosgw
- when: migration_completed.stat.exists == False
-
- - name: Wait for radosgw to be up again
- local_action: >
- wait_for
- host={{ ansible_ssh_host | default(inventory_hostname) }}
- path=/tmp/radosgw.sock
- state=present
- timeout=30
- when: migration_completed.stat.exists == False
-
- - name: Done moving to the next rados gateway
- file: >
- path=/var/lib/ceph/radosgw/migration_completed
- state=touch
- owner=root
- group=root
- mode=0600
- when: migration_completed.stat.exists == False
+++ /dev/null
----
-# This playbook will make custom partition layout for your osd hosts.
-# You should define `devices` variable for every host.
-#
-# For example, in host_vars/hostname1
-#
-# devices:
-# - device_name: sdb
-# partitions:
-# - index: 1
-# size: 10G
-# type: data
-# - index: 2
-# size: 5G
-# type: journal
-# - device_name: sdc
-# partitions:
-# - index: 1
-# size: 10G
-# type: data
-# - index: 2
-# size: 5G
-# type: journal
-#
-- vars:
- osd_group_name: osds
- journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
- data_typecode: 4fbd7e29-9d25-41b8-afd0-062c0ceff05d
- devices: []
- hosts:
- - "{{ osd_group_name }}"
-
- tasks:
-
- - name: load a variable file for devices partition
- include_vars: "{{ item }}"
- with_first_found:
- - files:
- - "host_vars/{{ ansible_hostname }}.yml"
- - "host_vars/default.yml"
- skip: true
-
- - name: exit playbook, if devices not defined
- fail:
- msg: "devices must be define in host_vars/default.yml or host_vars/{{ ansible_hostname }}.yml"
- when: devices is not defined
-
- - name: install sgdisk(gdisk)
- package:
- name: gdisk
- state: present
-
- - name: erase all previous partitions(dangerous!!!)
- shell: sgdisk --zap-all -- /dev/{{item.device_name}}
- with_items: "{{ devices }}"
-
- - name: make osd partitions
- shell: >
- sgdisk --new={{item.1.index}}:0:+{{item.1.size}} "--change-name={{item.1.index}}:ceph {{item.1.type}}"
- "--typecode={{item.1.index}}:{% if item.1.type=='data' %}{{data_typecode}}{% else %}{{journal_typecode}}{% endif %}"
- --mbrtogpt -- /dev/{{item.0.device_name}}
- with_subelements:
- - "{{ devices }}"
- - partitions
-
- - set_fact:
- owner: 167
- group: 167
- when:
- - ansible_os_family == "RedHat"
-
- - set_fact:
- owner: 64045
- group: 64045
- when:
- - ansible_os_family == "Debian"
-
- - name: change partitions ownership
- file:
- path: "/dev/{{item.0.device_name}}{{item.1.index}}"
- owner: "{{ owner | default('root')}}"
- group: "{{ group | default('disk')}}"
- with_subelements:
- - "{{ devices }}"
- - partitions
- when:
- item.0.device_name | match('/dev/([hsv]d[a-z]{1,2}){1,2}$')
-
- - name: change partitions ownership
- file:
- path: "/dev/{{item.0.device_name}}p{{item.1.index}}"
- owner: "{{ owner | default('root')}}"
- group: "{{ group | default('disk')}}"
- with_subelements:
- - "{{ devices }}"
- - partitions
- when:
- item.0.device_name | match('/dev/(cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}$')
-...
\ No newline at end of file
+++ /dev/null
----
-# This playbook use to migrate activity osd(s) journal to SSD.
-#
-# You should define `osds_journal_devices` variable for host which osd(s) journal migrate to.
-#
-# For example in host_vars/hostname1.yml
-#
-# osds_journal_devices:
-# - device_name: /dev/sdd
-# partitions:
-# - index: 1
-# size: 10G
-# osd_id: 0
-# - index: 2
-# size: 10G
-# osd_id: 1
-# - device_name: /dev/sdf
-# partitions:
-# - index: 1
-# size: 10G
-# osd_id: 2
-#
-# @param device_name: The full device path of new ssd.
-# @param partitions: The custom partition layout of ssd.
-# @param index: The index of this partition.
-# @param size: The size of this partition.
-# @param osd_id: Which osds's journal this partition for.
-#
-# ansible-playbook migrate-journal-to-ssd.yml
-# The playbook will migrate osd(s) journal to ssd device which you define in host_vars.
-
-- vars:
- osd_group_name: osds
- journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
- osds_journal_devices: []
- hosts:
- - "{{ osd_group_name }}"
- serial: 1
- tasks:
-
- - name: get osd(s) if directory stat
- stat:
- path: "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
- register: osds_dir_stat
- with_subelements:
- - "{{ osds_journal_devices }}"
- - partitions
-
- - name: exit playbook osd(s) is not on this host
- fail:
- msg: exit playbook osd(s) is not on this host
- with_items:
- osds_dir_stat.results
- when:
- - osds_dir_stat is defined and item.stat.exists == false
-
- - name: install sgdisk(gdisk)
- package:
- name: gdisk
- state: present
- when: osds_journal_devices is defined
-
- - name: generate uuid for osds journal
- command: uuidgen
- register: osds
- with_subelements:
- - "{{ osds_journal_devices }}"
- - partitions
-
- - name: make osd partitions on ssd
- shell: >
- sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal"
- --typecode={{ item.item[1].index }}:{{ journal_typecode }}
- --partition-guid={{ item.item[1].index }}:{{ item.stdout }}
- --mbrtogpt -- {{ item.item[0].device_name }}
- with_items:
- - "{{ osds.results }}"
-
- - name: stop osd(s) service
- service:
- name: "ceph-osd@{{ item.item[1].osd_id }}"
- state: stopped
- with_items:
- - "{{ osds.results }}"
-
- - name: flush osd(s) journal
- command: ceph-osd -i {{ item.item[1].osd_id }} --flush-journal --cluster {{ cluster }}
- with_items:
- - "{{ osds.results }}"
- when: osds_journal_devices is defined
-
- - name: update osd(s) journal soft link
- command: ln -sf /dev/disk/by-partuuid/{{ item.stdout }} /var/lib/ceph/osd/{{ cluster }}-{{ item.item[1].osd_id }}/journal
- with_items:
- - "{{ osds.results }}"
-
- - name: update osd(s) journal uuid
- command: echo {{ item.stdout }} > /var/lib/ceph/osd/{{ cluster }}-{{ item.item[1].osd_id }}/journal_uuid
- with_items:
- - "{{ osds.results }}"
-
- - name: initialize osd(s) new journal
- command: ceph-osd -i {{ item.item[1].osd_id }} --mkjournal --cluster {{ cluster }}
- with_items:
- - "{{ osds.results }}"
-
- - name: start osd(s) service
- service:
- name: "ceph-osd@{{ item.item[1].osd_id }}"
- state: started
- with_items:
- - "{{ osds.results }}"
+++ /dev/null
----
-# Nukes a multisite config
-- hosts: rgws
- become: True
- tasks:
- - include: roles/ceph-rgw/tasks/multisite/destroy.yml
-
- handlers:
- - include: roles/ceph-rgw/handlers/main.yml
- # Ansible 2.1.0 bug will ignore included handlers without this
- static: True
+++ /dev/null
----
-# This playbook use to recover Ceph OSDs after ssd journal failure.
-# You will also realise that it’s really simple to bring your
-# OSDs back to life after replacing your faulty SSD with a new one.
-#
-# You should define `dev_ssds` variable for host which changes ssds after
-# failure.
-#
-# For example in host_vars/hostname1.yml
-#
-# dev_ssds:
-# - device_name: /dev/sdd
-# partitions:
-# - index: 1
-# size: 10G
-# osd_id: 0
-# - index: 2
-# size: 10G
-# osd_id: 1
-# - device_name: /dev/sdf
-# partitions:
-# - index: 1
-# size: 10G
-# osd_id: 2
-#
-# @param device_name: The full device path of new ssd
-# @param partitions: The custom partition layout of new ssd
-# @param index: The index of this partition
-# @param size: The size of this partition
-# @param osd_id: Which osds's journal this partition for.
-#
-# ansible-playbook recover-osds-after-ssd-journal-failure.yml
-# Prompts for select which host to recover, defaults to null,
-# doesn't select host the recover ssd. Input the hostname
-# which to recover osds after ssd journal failure
-#
-# ansible-playbook -e target_host=hostname \
-# recover-osds-after-ssd-journal-failure.yml
-# Overrides the prompt using -e option. Can be used in
-# automation scripts to avoid interactive prompt.
-
-- hosts: localhost
- gather_facts: no
- vars_prompt:
- - name: target_host
- prompt: please enter the target hostname which to recover osds after ssd journal failure
- private: no
- tasks:
- - add_host:
- name: "{{ target_host }}"
- groups: dynamically_created_hosts
-
-- hosts: dynamically_created_hosts
- vars:
- journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
- dev_ssds: []
-
- tasks:
- - fail: msg="please define dev_ssds variable"
- when: dev_ssds|length <= 0
-
- - name: get osd(s) if directory stat
- stat:
- path: "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
- register: osds_dir_stat
- with_subelements:
- - "{{ dev_ssds }}"
- - partitions
-
- - name: exit playbook osd(s) is not on this host
- fail:
- msg: exit playbook osds is not no this host
- with_items:
- osds_dir_stat.results
- when:
- - osds_dir_stat is defined
- - item.stat.exists == false
-
- - name: install sgdisk(gdisk)
- package:
- name: gdisk
- state: present
-
- - name: get osd(s) journal uuid
- command: cat "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
- register: osds_uuid
- with_subelements:
- - "{{ dev_ssds }}"
- - partitions
-
- - name: make partitions on new ssd
- shell: >
- sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal"
- --typecode={{ item.item[1].index }}:{{ journal_typecode }}
- --partition-guid={{ item.item[1].index }}:{{ item.stdout }}
- --mbrtogpt -- {{ item.item[0].device_name }}
- with_items:
- - "{{ osds_uuid.results }}"
-
- - name: stop osd(s) service
- service:
- name: "ceph-osd@{{ item.item[1].osd_id }}"
- state: stopped
- with_items:
- - "{{ osds_uuid.results }}"
-
- - name: reinitialize osd(s) journal in new ssd
- command: ceph-osd -i {{ item.item[1].osd_id }} --mkjournal --cluster {{ cluster }}
- with_items:
- - "{{ osds_uuid.results }}"
-
- - name: start osd(s) service
- service:
- name: "ceph-osd@{{ item.item[1].osd_id }}"
- state: started
- with_items:
- - "{{ osds_uuid.results }}"
--- /dev/null
+---
+# This playbook was made to automate Ceph servers maintenance
+# Typical use case: hardware change
+# By running this playbook you will set the 'noout' flag on your
+# cluster, which means that OSD **can't** be marked as out
+# of the CRUSH map, but they will be marked as down.
+# Basically we tell the cluster to don't move any data since
+# the operation won't last for too long.
+
+- hosts: <your_host>
+ gather_facts: False
+
+ tasks:
+
+ - name: Set the noout flag
+ command: ceph osd set noout
+ delegate_to: <your_monitor>
+
+ - name: Turn off the server
+ command: poweroff
+
+ - name: Wait for the server to go down
+ local_action: >
+ wait_for host=<your_host>
+ port=22
+ state=stopped
+
+ - name: Wait for the server to come up
+ local_action: >
+ wait_for host=<your_host>
+ port=22
+ delay=10
+ timeout=3600
+
+ - name: Unset the noout flag
+ command: ceph osd unset noout
+ delegate_to: <your_monitor>
--- /dev/null
+---
+# This playbook was meant to upgrade a node from Ubuntu to RHEL.
+# We are performing a set of actions prior to reboot the node.
+# The node reboots via PXE and gets its new operating system.
+# This playbook only works for monitors and OSDs.
+# Note that some of the checks are ugly:
+# ie: the when migration_completed.stat.exists
+# can be improved with includes, however I wanted to keep a single file...
+#
+
+- hosts: mons
+ serial: 1
+ sudo: True
+
+ vars:
+ backup_dir: /tmp/
+
+ tasks:
+
+ - name: Check if the node has be migrated already
+ stat: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
+ register: migration_completed
+ failed_when: false
+
+ - name: Check for failed run
+ stat: >
+ path=/var/lib/ceph/{{ ansible_hostname }}.tar
+ register: mon_archive_leftover
+
+ - fail: msg="Looks like an archive is already there, please remove it!"
+ when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True
+
+ - name: Compress the store as much as possible
+ command: ceph tell mon.{{ ansible_hostname }} compact
+ when: migration_completed.stat.exists == False
+
+ - name: Check if sysvinit
+ stat: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
+ register: monsysvinit
+ changed_when: False
+
+ - name: Check if upstart
+ stat: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
+ register: monupstart
+ changed_when: False
+
+ - name: Check if init does what it is supposed to do (Sysvinit)
+ shell: >
+ ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null
+ register: ceph_status_sysvinit
+ changed_when: False
+
+ # can't complete the condition since the previous taks never ran...
+ - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
+ when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
+
+ - name: Check if init does what it is supposed to do (upstart)
+ shell: >
+ ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null
+ register: ceph_status_upstart
+ changed_when: False
+
+ - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
+ when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
+
+ - name: Restart the Monitor after compaction (Upstart)
+ service: >
+ name=ceph-mon
+ state=restarted
+ args=id={{ ansible_hostname }}
+ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
+
+ - name: Restart the Monitor after compaction (Sysvinit)
+ service: >
+ name=ceph
+ state=restarted
+ args=mon
+ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
+
+ - name: Wait for the monitor to be up again
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ port=6789
+ timeout=10
+ when: migration_completed.stat.exists == False
+
+ - name: Stop the monitor (Upstart)
+ service: >
+ name=ceph-mon
+ state=stopped
+ args=id={{ ansible_hostname }}
+ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
+
+ - name: Stop the monitor (Sysvinit)
+ service: >
+ name=ceph
+ state=stopped
+ args=mon
+ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
+
+ - name: Wait for the monitor to be down
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ port=6789
+ timeout=10
+ state=stopped
+ when: migration_completed.stat.exists == False
+
+ - name: Create a backup directory
+ file: >
+ path={{ backup_dir }}/monitors-backups
+ state=directory
+ owner=root
+ group=root
+ mode=0644
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups.backup[0] }}"
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): should we convert upstart to sysvinit here already?
+ - name: Archive monitor stores
+ shell: >
+ tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar
+ chdir=/var/lib/ceph/
+ creates={{ ansible_hostname }}.tar
+ when: migration_completed.stat.exists == False
+
+ - name: Scp the Monitor store
+ fetch: >
+ src=/var/lib/ceph/{{ ansible_hostname }}.tar
+ dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
+ flat=yes
+ when: migration_completed.stat.exists == False
+
+ - name: Reboot the server
+ command: reboot
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for the server to come up
+ local_action: >
+ wait_for
+ port=22
+ delay=10
+ timeout=3600
+ when: migration_completed.stat.exists == False
+
+ - name: Wait a bit more to be sure that the server is ready
+ pause: seconds=20
+ when: migration_completed.stat.exists == False
+
+ - name: Check if sysvinit
+ stat: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
+ register: monsysvinit
+ changed_when: False
+
+ - name: Check if upstart
+ stat: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
+ register: monupstart
+ changed_when: False
+
+ - name: Make sure the monitor is stopped (Upstart)
+ service: >
+ name=ceph-mon
+ state=stopped
+ args=id={{ ansible_hostname }}
+ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
+
+ - name: Make sure the monitor is stopped (Sysvinit)
+ service: >
+ name=ceph
+ state=stopped
+ args=mon
+ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
+
+ # NOTE (leseb): 'creates' was added in Ansible 1.6
+ - name: Copy and unarchive the monitor store
+ unarchive: >
+ src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
+ dest=/var/lib/ceph/
+ copy=yes
+ mode=0600
+ creates=etc/ceph/ceph.conf
+ when: migration_completed.stat.exists == False
+
+ - name: Copy keys and configs
+ shell: >
+ cp etc/ceph/* /etc/ceph/
+ chdir=/var/lib/ceph/
+ when: migration_completed.stat.exists == False
+
+ - name: Configure RHEL7 for sysvinit
+ shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
+ # so we directly call sysvinit
+ - name: Start the monitor
+ service: >
+ name=ceph
+ state=started
+ args=mon
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for the Monitor to be up again
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ port=6789
+ timeout=10
+ when: migration_completed.stat.exists == False
+
+ - name: Waiting for the monitor to join the quorum...
+ shell: >
+ ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }}
+ register: result
+ until: result.rc == 0
+ retries: 5
+ delay: 10
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups.backup[0] }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Done moving to the next monitor
+ file: >
+ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
+ state=touch
+ owner=root
+ group=root
+ mode=0600
+ when: migration_completed.stat.exists == False
+
+- hosts: osds
+ serial: 1
+ sudo: True
+
+ vars:
+ backup_dir: /tmp/
+
+ tasks:
+ - name: Check if the node has be migrated already
+ stat: >
+ path=/var/lib/ceph/migration_completed
+ register: migration_completed
+ failed_when: false
+
+ - name: Check for failed run
+ stat: >
+ path=/var/lib/ceph/{{ ansible_hostname }}.tar
+ register: osd_archive_leftover
+
+ - fail: msg="Looks like an archive is already there, please remove it!"
+ when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True
+
+ - name: Check if init does what it is supposed to do (Sysvinit)
+ shell: >
+ ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null
+ register: ceph_status_sysvinit
+ changed_when: False
+
+ # can't complete the condition since the previous taks never ran...
+ - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
+ when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
+
+ - name: Check if init does what it is supposed to do (upstart)
+ shell: >
+ ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]"
+ register: ceph_status_upstart
+ changed_when: False
+
+ - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
+ when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
+
+ - name: Set the noout flag
+ command: ceph osd set noout
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups[mon_group_name][0] }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Check if sysvinit
+ shell: stat /var/lib/ceph/osd/ceph-*/sysvinit
+ register: osdsysvinit
+ failed_when: false
+ changed_when: False
+
+ - name: Check if upstart
+ shell: stat /var/lib/ceph/osd/ceph-*/upstart
+ register: osdupstart
+ failed_when: false
+ changed_when: False
+
+ - name: Archive ceph configs
+ shell: >
+ tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar
+ chdir=/var/lib/ceph/
+ creates={{ ansible_hostname }}.tar
+ when: migration_completed.stat.exists == False
+
+ - name: Create backup directory
+ file: >
+ path={{ backup_dir }}/osds-backups
+ state=directory
+ owner=root
+ group=root
+ mode=0644
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups.backup[0] }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Scp OSDs dirs and configs
+ fetch: >
+ src=/var/lib/ceph/{{ ansible_hostname }}.tar
+ dest={{ backup_dir }}/osds-backups/
+ flat=yes
+ when: migration_completed.stat.exists == False
+
+ - name: Collect OSD ports
+ shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq
+ register: osd_ports
+ when: migration_completed.stat.exists == False
+
+ - name: Gracefully stop the OSDs (Upstart)
+ service: >
+ name=ceph-osd-all
+ state=stopped
+ when: osdupstart.rc == 0 and migration_completed.stat.exists == False
+
+ - name: Gracefully stop the OSDs (Sysvinit)
+ service: >
+ name=ceph
+ state=stopped
+ args=mon
+ when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False
+
+ - name: Wait for the OSDs to be down
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ port={{ item }}
+ timeout=10
+ state=stopped
+ with_items: "{{ osd_ports.stdout_lines }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Configure RHEL with sysvinit
+ shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
+ when: migration_completed.stat.exists == False
+
+ - name: Reboot the server
+ command: reboot
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for the server to come up
+ local_action: >
+ wait_for
+ port=22
+ delay=10
+ timeout=3600
+ when: migration_completed.stat.exists == False
+
+ - name: Wait a bit to be sure that the server is ready for scp
+ pause: seconds=20
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): 'creates' was added in Ansible 1.6
+ - name: Copy and unarchive the OSD configs
+ unarchive: >
+ src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar
+ dest=/var/lib/ceph/
+ copy=yes
+ mode=0600
+ creates=etc/ceph/ceph.conf
+ when: migration_completed.stat.exists == False
+
+ - name: Copy keys and configs
+ shell: >
+ cp etc/ceph/* /etc/ceph/
+ chdir=/var/lib/ceph/
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
+ # so we directly call sysvinit
+ - name: Start all the OSDs
+ service: >
+ name=ceph-osd-all
+ state=started
+ args=osd
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): this is tricky unless this is set into the ceph.conf
+ # listened ports can be predicted, thus they will change after each restart
+# - name: Wait for the OSDs to be up again
+# local_action: >
+# wait_for
+# host={{ ansible_ssh_host | default(inventory_hostname) }}
+# port={{ item }}
+# timeout=30
+# with_items:
+# - "{{ osd_ports.stdout_lines }}"
+
+ - name: Waiting for clean PGs...
+ shell: >
+ test "[""$(ceph -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
+ register: result
+ until: result.rc == 0
+ retries: 10
+ delay: 10
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups.backup[0] }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Done moving to the next OSD
+ file: >
+ path=/var/lib/ceph/migration_completed
+ state=touch
+ owner=root
+ group=root
+ mode=0600
+ when: migration_completed.stat.exists == False
+
+ - name: Unset the noout flag
+ command: ceph osd unset noout
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups[mon_group_name][0] }}"
+ when: migration_completed.stat.exists == False
+
+- hosts: rgws
+ serial: 1
+ sudo: True
+
+ vars:
+ backup_dir: /tmp/
+
+ tasks:
+ - name: Check if the node has be migrated already
+ stat: >
+ path=/var/lib/ceph/radosgw/migration_completed
+ register: migration_completed
+ failed_when: false
+
+ - name: Check for failed run
+ stat: >
+ path=/var/lib/ceph/{{ ansible_hostname }}.tar
+ register: rgw_archive_leftover
+
+ - fail: msg="Looks like an archive is already there, please remove it!"
+ when: migration_completed.stat.exists == False and rgw_archive_leftover.stat.exists == True
+
+ - name: Archive rados gateway configs
+ shell: >
+ tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar
+ chdir=/var/lib/ceph/
+ creates={{ ansible_hostname }}.tar
+ when: migration_completed.stat.exists == False
+
+ - name: Create backup directory
+ file: >
+ path={{ backup_dir }}/rgws-backups
+ state=directory
+ owner=root
+ group=root
+ mode=0644
+ delegate_to: "{{ item }}"
+ with_items: "{{ groups.backup[0] }}"
+ when: migration_completed.stat.exists == False
+
+ - name: Scp RGWs dirs and configs
+ fetch: >
+ src=/var/lib/ceph/{{ ansible_hostname }}.tar
+ dest={{ backup_dir }}/rgws-backups/
+ flat=yes
+ when: migration_completed.stat.exists == False
+
+ - name: Gracefully stop the rados gateway
+ service: >
+ name={{ item }}
+ state=stopped
+ with_items:
+ - radosgw
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for radosgw to be down
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ path=/tmp/radosgw.sock
+ state=absent
+ timeout=30
+ when: migration_completed.stat.exists == False
+
+ - name: Reboot the server
+ command: reboot
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for the server to come up
+ local_action: >
+ wait_for
+ port=22
+ delay=10
+ timeout=3600
+ when: migration_completed.stat.exists == False
+
+ - name: Wait a bit to be sure that the server is ready for scp
+ pause: seconds=20
+ when: migration_completed.stat.exists == False
+
+ # NOTE (leseb): 'creates' was added in Ansible 1.6
+ - name: Copy and unarchive the OSD configs
+ unarchive: >
+ src={{ backup_dir }}/rgws-backups/{{ ansible_hostname }}.tar
+ dest=/var/lib/ceph/
+ copy=yes
+ mode=0600
+ creates=etc/ceph/ceph.conf
+ when: migration_completed.stat.exists == False
+
+ - name: Copy keys and configs
+ shell: >
+ {{ item }}
+ chdir=/var/lib/ceph/
+ with_items:
+ - cp etc/ceph/* /etc/ceph/
+ when: migration_completed.stat.exists == False
+
+ - name: Start rados gateway
+ service: >
+ name={{ item }}
+ state=started
+ with_items:
+ - radosgw
+ when: migration_completed.stat.exists == False
+
+ - name: Wait for radosgw to be up again
+ local_action: >
+ wait_for
+ host={{ ansible_ssh_host | default(inventory_hostname) }}
+ path=/tmp/radosgw.sock
+ state=present
+ timeout=30
+ when: migration_completed.stat.exists == False
+
+ - name: Done moving to the next rados gateway
+ file: >
+ path=/var/lib/ceph/radosgw/migration_completed
+ state=touch
+ owner=root
+ group=root
+ mode=0600
+ when: migration_completed.stat.exists == False
--- /dev/null
+---
+# This playbook will make custom partition layout for your osd hosts.
+# You should define `devices` variable for every host.
+#
+# For example, in host_vars/hostname1
+#
+# devices:
+# - device_name: sdb
+# partitions:
+# - index: 1
+# size: 10G
+# type: data
+# - index: 2
+# size: 5G
+# type: journal
+# - device_name: sdc
+# partitions:
+# - index: 1
+# size: 10G
+# type: data
+# - index: 2
+# size: 5G
+# type: journal
+#
+- vars:
+ osd_group_name: osds
+ journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
+ data_typecode: 4fbd7e29-9d25-41b8-afd0-062c0ceff05d
+ devices: []
+ hosts:
+ - "{{ osd_group_name }}"
+
+ tasks:
+
+ - name: load a variable file for devices partition
+ include_vars: "{{ item }}"
+ with_first_found:
+ - files:
+ - "host_vars/{{ ansible_hostname }}.yml"
+ - "host_vars/default.yml"
+ skip: true
+
+ - name: exit playbook, if devices not defined
+ fail:
+ msg: "devices must be define in host_vars/default.yml or host_vars/{{ ansible_hostname }}.yml"
+ when: devices is not defined
+
+ - name: install sgdisk(gdisk)
+ package:
+ name: gdisk
+ state: present
+
+ - name: erase all previous partitions(dangerous!!!)
+ shell: sgdisk --zap-all -- /dev/{{item.device_name}}
+ with_items: "{{ devices }}"
+
+ - name: make osd partitions
+ shell: >
+ sgdisk --new={{item.1.index}}:0:+{{item.1.size}} "--change-name={{item.1.index}}:ceph {{item.1.type}}"
+ "--typecode={{item.1.index}}:{% if item.1.type=='data' %}{{data_typecode}}{% else %}{{journal_typecode}}{% endif %}"
+ --mbrtogpt -- /dev/{{item.0.device_name}}
+ with_subelements:
+ - "{{ devices }}"
+ - partitions
+
+ - set_fact:
+ owner: 167
+ group: 167
+ when:
+ - ansible_os_family == "RedHat"
+
+ - set_fact:
+ owner: 64045
+ group: 64045
+ when:
+ - ansible_os_family == "Debian"
+
+ - name: change partitions ownership
+ file:
+ path: "/dev/{{item.0.device_name}}{{item.1.index}}"
+ owner: "{{ owner | default('root')}}"
+ group: "{{ group | default('disk')}}"
+ with_subelements:
+ - "{{ devices }}"
+ - partitions
+ when:
+ item.0.device_name | match('/dev/([hsv]d[a-z]{1,2}){1,2}$')
+
+ - name: change partitions ownership
+ file:
+ path: "/dev/{{item.0.device_name}}p{{item.1.index}}"
+ owner: "{{ owner | default('root')}}"
+ group: "{{ group | default('disk')}}"
+ with_subelements:
+ - "{{ devices }}"
+ - partitions
+ when:
+ item.0.device_name | match('/dev/(cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}$')
+...
\ No newline at end of file
--- /dev/null
+---
+# This playbook use to migrate activity osd(s) journal to SSD.
+#
+# You should define `osds_journal_devices` variable for host which osd(s) journal migrate to.
+#
+# For example in host_vars/hostname1.yml
+#
+# osds_journal_devices:
+# - device_name: /dev/sdd
+# partitions:
+# - index: 1
+# size: 10G
+# osd_id: 0
+# - index: 2
+# size: 10G
+# osd_id: 1
+# - device_name: /dev/sdf
+# partitions:
+# - index: 1
+# size: 10G
+# osd_id: 2
+#
+# @param device_name: The full device path of new ssd.
+# @param partitions: The custom partition layout of ssd.
+# @param index: The index of this partition.
+# @param size: The size of this partition.
+# @param osd_id: Which osds's journal this partition for.
+#
+# ansible-playbook migrate-journal-to-ssd.yml
+# The playbook will migrate osd(s) journal to ssd device which you define in host_vars.
+
+- vars:
+ osd_group_name: osds
+ journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
+ osds_journal_devices: []
+ hosts:
+ - "{{ osd_group_name }}"
+ serial: 1
+ tasks:
+
+ - name: get osd(s) if directory stat
+ stat:
+ path: "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
+ register: osds_dir_stat
+ with_subelements:
+ - "{{ osds_journal_devices }}"
+ - partitions
+
+ - name: exit playbook osd(s) is not on this host
+ fail:
+ msg: exit playbook osd(s) is not on this host
+ with_items:
+ osds_dir_stat.results
+ when:
+ - osds_dir_stat is defined and item.stat.exists == false
+
+ - name: install sgdisk(gdisk)
+ package:
+ name: gdisk
+ state: present
+ when: osds_journal_devices is defined
+
+ - name: generate uuid for osds journal
+ command: uuidgen
+ register: osds
+ with_subelements:
+ - "{{ osds_journal_devices }}"
+ - partitions
+
+ - name: make osd partitions on ssd
+ shell: >
+ sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal"
+ --typecode={{ item.item[1].index }}:{{ journal_typecode }}
+ --partition-guid={{ item.item[1].index }}:{{ item.stdout }}
+ --mbrtogpt -- {{ item.item[0].device_name }}
+ with_items:
+ - "{{ osds.results }}"
+
+ - name: stop osd(s) service
+ service:
+ name: "ceph-osd@{{ item.item[1].osd_id }}"
+ state: stopped
+ with_items:
+ - "{{ osds.results }}"
+
+ - name: flush osd(s) journal
+ command: ceph-osd -i {{ item.item[1].osd_id }} --flush-journal --cluster {{ cluster }}
+ with_items:
+ - "{{ osds.results }}"
+ when: osds_journal_devices is defined
+
+ - name: update osd(s) journal soft link
+ command: ln -sf /dev/disk/by-partuuid/{{ item.stdout }} /var/lib/ceph/osd/{{ cluster }}-{{ item.item[1].osd_id }}/journal
+ with_items:
+ - "{{ osds.results }}"
+
+ - name: update osd(s) journal uuid
+ command: echo {{ item.stdout }} > /var/lib/ceph/osd/{{ cluster }}-{{ item.item[1].osd_id }}/journal_uuid
+ with_items:
+ - "{{ osds.results }}"
+
+ - name: initialize osd(s) new journal
+ command: ceph-osd -i {{ item.item[1].osd_id }} --mkjournal --cluster {{ cluster }}
+ with_items:
+ - "{{ osds.results }}"
+
+ - name: start osd(s) service
+ service:
+ name: "ceph-osd@{{ item.item[1].osd_id }}"
+ state: started
+ with_items:
+ - "{{ osds.results }}"
--- /dev/null
+---
+# Nukes a multisite config
+- hosts: rgws
+ become: True
+ tasks:
+ - include: roles/ceph-rgw/tasks/multisite/destroy.yml
+
+ handlers:
+ - include: roles/ceph-rgw/handlers/main.yml
+ # Ansible 2.1.0 bug will ignore included handlers without this
+ static: True
--- /dev/null
+---
+# This playbook use to recover Ceph OSDs after ssd journal failure.
+# You will also realise that it’s really simple to bring your
+# OSDs back to life after replacing your faulty SSD with a new one.
+#
+# You should define `dev_ssds` variable for host which changes ssds after
+# failure.
+#
+# For example in host_vars/hostname1.yml
+#
+# dev_ssds:
+# - device_name: /dev/sdd
+# partitions:
+# - index: 1
+# size: 10G
+# osd_id: 0
+# - index: 2
+# size: 10G
+# osd_id: 1
+# - device_name: /dev/sdf
+# partitions:
+# - index: 1
+# size: 10G
+# osd_id: 2
+#
+# @param device_name: The full device path of new ssd
+# @param partitions: The custom partition layout of new ssd
+# @param index: The index of this partition
+# @param size: The size of this partition
+# @param osd_id: Which osds's journal this partition for.
+#
+# ansible-playbook recover-osds-after-ssd-journal-failure.yml
+# Prompts for select which host to recover, defaults to null,
+# doesn't select host the recover ssd. Input the hostname
+# which to recover osds after ssd journal failure
+#
+# ansible-playbook -e target_host=hostname \
+# recover-osds-after-ssd-journal-failure.yml
+# Overrides the prompt using -e option. Can be used in
+# automation scripts to avoid interactive prompt.
+
+- hosts: localhost
+ gather_facts: no
+ vars_prompt:
+ - name: target_host
+ prompt: please enter the target hostname which to recover osds after ssd journal failure
+ private: no
+ tasks:
+ - add_host:
+ name: "{{ target_host }}"
+ groups: dynamically_created_hosts
+
+- hosts: dynamically_created_hosts
+ vars:
+ journal_typecode: 45b0969e-9b03-4f30-b4c6-b4b80ceff106
+ dev_ssds: []
+
+ tasks:
+ - fail: msg="please define dev_ssds variable"
+ when: dev_ssds|length <= 0
+
+ - name: get osd(s) if directory stat
+ stat:
+ path: "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
+ register: osds_dir_stat
+ with_subelements:
+ - "{{ dev_ssds }}"
+ - partitions
+
+ - name: exit playbook osd(s) is not on this host
+ fail:
+ msg: exit playbook osds is not no this host
+ with_items:
+ osds_dir_stat.results
+ when:
+ - osds_dir_stat is defined
+ - item.stat.exists == false
+
+ - name: install sgdisk(gdisk)
+ package:
+ name: gdisk
+ state: present
+
+ - name: get osd(s) journal uuid
+ command: cat "/var/lib/ceph/osd/{{ cluster }}-{{ item.1.osd_id }}/journal_uuid"
+ register: osds_uuid
+ with_subelements:
+ - "{{ dev_ssds }}"
+ - partitions
+
+ - name: make partitions on new ssd
+ shell: >
+ sgdisk --new={{item.item[1].index}}:0:+{{item.item[1].size}} "--change-name={{ item.item[1].index }}:ceph journal"
+ --typecode={{ item.item[1].index }}:{{ journal_typecode }}
+ --partition-guid={{ item.item[1].index }}:{{ item.stdout }}
+ --mbrtogpt -- {{ item.item[0].device_name }}
+ with_items:
+ - "{{ osds_uuid.results }}"
+
+ - name: stop osd(s) service
+ service:
+ name: "ceph-osd@{{ item.item[1].osd_id }}"
+ state: stopped
+ with_items:
+ - "{{ osds_uuid.results }}"
+
+ - name: reinitialize osd(s) journal in new ssd
+ command: ceph-osd -i {{ item.item[1].osd_id }} --mkjournal --cluster {{ cluster }}
+ with_items:
+ - "{{ osds_uuid.results }}"
+
+ - name: start osd(s) service
+ service:
+ name: "ceph-osd@{{ item.item[1].osd_id }}"
+ state: started
+ with_items:
+ - "{{ osds_uuid.results }}"