--- /dev/null
--- /dev/null
++---
++# This playbook was meant to upgrade a node from Ubuntu to RHEL.
++# We are performing a set of actions prior to reboot the node.
++# The node reboots via PXE and gets its new operating system.
++# This playbook only works for monitors and OSDs.
++# Note that some of the checks are ugly:
++# ie: the when migration_completed.stat.exists
++# can be improved with includes, however I wanted to keep a single file...
++#
++
++- hosts: mons
++ serial: 1
++ sudo: True
++
++ vars:
++ backup_dir: /tmp/
++
++ tasks:
++
++ - name: Check if the node has be migrated already
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
++ register: migration_completed
++ ignore_errors: True
++
++ - name: Check for failed run
++ stat: >
++ path=/var/lib/ceph/{{ ansible_hostname }}.tar
++ register: mon_archive_leftover
++
++ - fail: msg="Looks like an archive is already there, please remove it!"
++ when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True
++
++ - name: Compress the store as much as possible
++ command: ceph tell mon.{{ ansible_hostname }} compact
++ when: migration_completed.stat.exists == False
++
++ - name: Check if sysvinit
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
++ register: monsysvinit
++ changed_when: False
++
++ - name: Check if upstart
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
++ register: monupstart
++ changed_when: False
++
++ - name: Check if init does what it is supposed to do (Sysvinit)
++ shell: >
++ ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null
++ register: ceph_status_sysvinit
++ changed_when: False
++
++ # can't complete the condition since the previous taks never ran...
++ - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
++ when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
++
++ - name: Check if init does what it is supposed to do (upstart)
++ shell: >
++ ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null
++ register: ceph_status_upstart
++ changed_when: False
++
++ - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
++ when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
++
++ - name: Restart the Monitor after compaction (Upstart)
++ service: >
++ name=ceph-mon
++ state=restarted
++ args=id={{ ansible_hostname }}
++ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
++
++ - name: Restart the Monitor after compaction (Sysvinit)
++ service: >
++ name=ceph
++ state=restarted
++ args=mon
++ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
++
++ - name: Wait for the monitor to be up again
++ local_action: >
++ wait_for
++ host={{ ansible_ssh_host | default(inventory_hostname) }}
++ port=6789
++ timeout=10
++ when: migration_completed.stat.exists == False
++
++ - name: Stop the monitor (Upstart)
++ service: >
++ name=ceph-mon
++ state=stopped
++ args=id={{ ansible_hostname }}
++ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
++
++ - name: Stop the monitor (Sysvinit)
++ service: >
++ name=ceph
++ state=stopped
++ args=mon
++ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
++
++ - name: Wait for the monitor to be down
++ local_action: >
++ wait_for
++ host={{ ansible_ssh_host | default(inventory_hostname) }}
++ port=6789
++ timeout=10
++ state=stopped
++ when: migration_completed.stat.exists == False
++
++ - name: Create a backup directory
++ file: >
++ path={{ backup_dir }}/monitors-backups
++ state=directory
++ owner=root
++ group=root
++ mode=0644
++ delegate_to: "{{ item }}"
++ with_items: groups.backup[0]
++ when: migration_completed.stat.exists == False
++
++ # NOTE (leseb): should we convert upstart to sysvinit here already?
++ - name: Archive monitor stores
++ shell: >
++ tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar
++ chdir=/var/lib/ceph/
++ creates={{ ansible_hostname }}.tar
++ when: migration_completed.stat.exists == False
++
++ - name: Scp the Monitor store
++ fetch: >
++ src=/var/lib/ceph/{{ ansible_hostname }}.tar
++ dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
++ flat=yes
++ when: migration_completed.stat.exists == False
++
++ - name: Reboot the server
++ command: reboot
++ when: migration_completed.stat.exists == False
++
++ - name: Wait for the server to come up
++ local_action: >
++ wait_for
++ port=22
++ delay=10
++ timeout=3600
++ when: migration_completed.stat.exists == False
++
++ - name: Wait a bit more to be sure that the server is ready
++ pause: seconds=20
++ when: migration_completed.stat.exists == False
++
++ - name: Check if sysvinit
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
++ register: monsysvinit
++ changed_when: False
++
++ - name: Check if upstart
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
++ register: monupstart
++ changed_when: False
++
++ - name: Make sure the monitor is stopped (Upstart)
++ service: >
++ name=ceph-mon
++ state=stopped
++ args=id={{ ansible_hostname }}
++ when: monupstart.stat.exists == True and migration_completed.stat.exists == False
++
++ - name: Make sure the monitor is stopped (Sysvinit)
++ service: >
++ name=ceph
++ state=stopped
++ args=mon
++ when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False
++
++ # NOTE (leseb): 'creates' was added in Ansible 1.6
++ - name: Copy and unarchive the monitor store
++ unarchive: >
++ src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar
++ dest=/var/lib/ceph/
++ copy=yes
++ mode=0600
++ creates=etc/ceph/ceph.conf
++ when: migration_completed.stat.exists == False
++
++ - name: Copy keys and configs
++ shell: >
++ cp etc/ceph/* /etc/ceph/
++ chdir=/var/lib/ceph/
++ when: migration_completed.stat.exists == False
++
++ - name: Configure RHEL7 for sysvinit
++ shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
++ when: migration_completed.stat.exists == False
++
++ # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
++ # so we directly call sysvinit
++ - name: Start the monitor
++ service: >
++ name=ceph
++ state=started
++ args=mon
++ when: migration_completed.stat.exists == False
++
++ - name: Wait for the Monitor to be up again
++ local_action: >
++ wait_for
++ host={{ ansible_ssh_host | default(inventory_hostname) }}
++ port=6789
++ timeout=10
++ when: migration_completed.stat.exists == False
++
++ - name: Waiting for the monitor to join the quorum...
++ shell: >
++ ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }}
++ register: result
++ until: result.rc == 0
++ retries: 5
++ delay: 10
++ delegate_to: "{{ item }}"
++ with_items: groups.backup[0]
++ when: migration_completed.stat.exists == False
++
++ - name: Done moving to the next monitor
++ file: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed
++ state=touch
++ owner=root
++ group=root
++ mode=0600
++ when: migration_completed.stat.exists == False
++
++- hosts: osds
++ serial: 1
++ sudo: True
++
++ vars:
++ backup_dir: /tmp/
++
++ tasks:
++ - name: Check if the node has be migrated already
++ stat: >
++ path=/var/lib/ceph/migration_completed
++ register: migration_completed
++ ignore_errors: True
++
++ - name: Check for failed run
++ stat: >
++ path=/var/lib/ceph/{{ ansible_hostname }}.tar
++ register: osd_archive_leftover
++
++ - fail: msg="Looks like an archive is already there, please remove it!"
++ when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True
++
++ - name: Check if init does what it is supposed to do (Sysvinit)
++ shell: >
++ ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null
++ register: ceph_status_sysvinit
++ changed_when: False
++
++ # can't complete the condition since the previous taks never ran...
++ - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
++ when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
++
++ - name: Check if init does what it is supposed to do (upstart)
++ shell: >
++ ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]"
++ register: ceph_status_upstart
++ changed_when: False
++
++ - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
++ when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
++
++ - name: Set the noout flag
++ command: ceph osd set noout
++ delegate_to: "{{ item }}"
++ with_items: groups.mons[0]
++ when: migration_completed.stat.exists == False
++
++ - name: Check if sysvinit
++ shell: stat /var/lib/ceph/osd/ceph-*/sysvinit
++ register: osdsysvinit
++ ignore_errors: True
++ changed_when: False
++
++ - name: Check if upstart
++ shell: stat /var/lib/ceph/osd/ceph-*/upstart
++ register: osdupstart
++ ignore_errors: True
++ changed_when: False
++
++ - name: Archive ceph configs
++ shell: >
++ tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar
++ chdir=/var/lib/ceph/
++ creates={{ ansible_hostname }}.tar
++ when: migration_completed.stat.exists == False
++
++ - name: Create backup directory
++ file: >
++ path={{ backup_dir }}/osds-backups
++ state=directory
++ owner=root
++ group=root
++ mode=0644
++ delegate_to: "{{ item }}"
++ with_items: groups.backup[0]
++ when: migration_completed.stat.exists == False
++
++ - name: Scp OSDs dirs and configs
++ fetch: >
++ src=/var/lib/ceph/{{ ansible_hostname }}.tar
++ dest={{ backup_dir }}/osds-backups/
++ flat=yes
++ when: migration_completed.stat.exists == False
++
++ - name: Collect OSD ports
++ shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq
++ register: osd_ports
++ when: migration_completed.stat.exists == False
++
++ - name: Gracefully stop the OSDs (Upstart)
++ service: >
++ name=ceph-osd-all
++ state=stopped
++ when: osdupstart.rc == 0 and migration_completed.stat.exists == False
++
++ - name: Gracefully stop the OSDs (Sysvinit)
++ service: >
++ name=ceph
++ state=stopped
++ args=mon
++ when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False
++
++ - name: Wait for the OSDs to be down
++ local_action: >
++ wait_for
++ host={{ ansible_ssh_host | default(inventory_hostname) }}
++ port={{ item }}
++ timeout=10
++ state=stopped
++ with_items:
++ - "{{ osd_ports.stdout_lines }}"
++ when: migration_completed.stat.exists == False
++
++ - name: Configure RHEL with sysvinit
++ shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
++ when: migration_completed.stat.exists == False
++
++ - name: Reboot the server
++ command: reboot
++ when: migration_completed.stat.exists == False
++
++ - name: Wait for the server to come up
++ local_action: >
++ wait_for
++ port=22
++ delay=10
++ timeout=3600
++ when: migration_completed.stat.exists == False
++
++ - name: Wait a bit to be sure that the server is ready for scp
++ pause: seconds=20
++ when: migration_completed.stat.exists == False
++
++ # NOTE (leseb): 'creates' was added in Ansible 1.6
++ - name: Copy and unarchive the OSD configs
++ unarchive: >
++ src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar
++ dest=/var/lib/ceph/
++ copy=yes
++ mode=0600
++ creates=etc/ceph/ceph.conf
++ when: migration_completed.stat.exists == False
++
++ - name: Copy keys and configs
++ shell: >
++ cp etc/ceph/* /etc/ceph/
++ chdir=/var/lib/ceph/
++ when: migration_completed.stat.exists == False
++
++ # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
++ # so we directly call sysvinit
++ - name: Start all the OSDs
++ service: >
++ name=ceph-osd-all
++ state=started
++ args=osd
++ when: migration_completed.stat.exists == False
++
++ # NOTE (leseb): this is tricky unless this is set into the ceph.conf
++ # listened ports can be predicted, thus they will change after each restart
++# - name: Wait for the OSDs to be up again
++# local_action: >
++# wait_for
++# host={{ ansible_ssh_host | default(inventory_hostname) }}
++# port={{ item }}
++# timeout=30
++# with_items:
++# - "{{ osd_ports.stdout_lines }}"
++
++ - name: Waiting for clean PGs...
++ shell: >
++ test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN"
++ register: result
++ until: result.rc == 0
++ retries: 10
++ delay: 10
++ delegate_to: "{{ item }}"
++ with_items: groups.backup[0]
++ when: migration_completed.stat.exists == False
++
++ - name: Done moving to the next OSD
++ file: >
++ path=/var/lib/ceph/migration_completed
++ state=touch
++ owner=root
++ group=root
++ mode=0600
++ when: migration_completed.stat.exists == False
++
++ - name: Unset the noout flag
++ command: ceph osd unset noout
++ delegate_to: "{{ item }}"
++ with_items: groups.mons[0]
++ when: migration_completed.stat.exists == False
--- /dev/null
-- hosts:
- - mons
- - osds
- - mdss
- - rgws
- sudo: True
- roles:
- - ceph-common
-
+ ---
+ # This playbook does a rolling update for all the Ceph services
+ # Change the value of serial: to adjust the number of server to be updated.
+ #
+ # The four roles that apply to the ceph hosts will be applied: ceph-common,
+ # ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc,
+ # will be applied as part of the rolling update process.
+ #
+
+ # /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\
+
- - ceph-mon
+ - hosts: mons
+ serial: 1
+ sudo: True
++
++ pre_tasks:
++ - name: Compress the store as much as possible
++ command: ceph tell mon.{{ ansible_hostname }} compact
++
+ roles:
- - name: restart monitor(s)
- service: >
- name=ceph
- state=restarted
- args=mon
++ - ceph-common
++ - ceph-mon
++
+ post_tasks:
- - ceph-osd
++ - name: Check if sysvinit
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
++ register: monsysvinit
++
++ - name: Check if upstart
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
++ register: monupstart
++
++ - name: Restart the monitor after compaction (Upstart)
++ service: >
++ name=ceph-mon
++ state=restarted
++ args=id={{ ansible_hostname }}
++ when: monupstart.stat.exists == True
++
++ - name: Restart the monitor after compaction (Sysvinit)
++ service: >
++ name=ceph
++ state=restarted
++ args=mon
++ when: monsysvinit.stat.exists == True
++
++ - name: restart monitor(s)
++ service: >
++ name=ceph
++ state=restarted
++ args=mon
++
++ - name: Waiting for the monitor to join the quorum...
++ shell: >
++ ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }}
++ register: result
++ until: result.rc == 0
++ retries: 5
++ delay: 10
++ delegate_to: 127.0.0.1
++
+
+ - hosts: osds
+ serial: 1
+ sudo: True
++
++ pre_tasks:
++ - name: Set the noout flag
++ command: ceph osd set noout
++ delegate_to: "{{ item }}"
++ with_items: groups.mons[0]
++
+ roles:
- - name: restart object storage daemon(s)
- command: service ceph-osd-all restart
- when: ansible_distribution == "Ubuntu"
- - name: restart object storage daemon(s)
- service: name=ceph state=restarted args=osd
- when: ansible_distribution == "Debian"
++ - ceph-common
++ - ceph-osd
++
+ post_tasks:
- - ceph-mds
++ - name: Check if sysvinit
++ shell: stat /var/lib/ceph/osd/ceph-*/sysvinit
++ register: osdsysvinit
++ ignore_errors: True
++
++ - name: Check if upstart
++ shell: stat /var/lib/ceph/osd/ceph-*/upstart
++ register: osdupstart
++ ignore_errors: True
++
++ - name: Gracefully stop the OSDs (Upstart)
++ service: >
++ name=ceph-osd-all
++ state=restarted
++ when: osdupstart.rc == 0
++
++ - name: Gracefully stop the OSDs (Sysvinit)
++ service: >
++ name=ceph
++ state=restarted
++ args=mon
++ when: osdsysvinit.rc == 0 and
++
++ - name: Waiting for clean PGs...
++ shell: >
++ test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN"
++ register: result
++ until: result.rc == 0
++ retries: 10
++ delay: 10
++ delegate_to: 127.0.0.1
++
++ - name: Unset the noout flag
++ command: ceph osd unset noout
++ delegate_to: "{{ item }}"
++ with_items: groups.mons[0]
++
+
+ - hosts: mdss
+ serial: 1
+ sudo: True
++
+ roles:
- - name: restart metadata server(s)
- service: >
- name=ceph
- state=restarted
- args=mds
++ - ceph-common
++ - ceph-mds
++
+ post_tasks:
++ - name: Check if sysvinit
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit
++ register: mdssysvinit
++
++ - name: Check if upstart
++ stat: >
++ path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
++ register: mdsupstart
++
++ - name: Restart the metadata server (Upstart)
++ service: >
++ name=ceph-mds
++ state=restarted
++ args=id={{ ansible_hostname }}
++ when: mdsupstart.stat.exists == True
++
++ - name: Restart the metadata server (Sysvinit)
++ service: >
++ name=ceph
++ state=restarted
++ args=mds
++ when: mdssysvinit.stat.exists == True
++
++
++- hosts: rgws
++ serial: 1
++ sudo: True
++
++ roles:
++ - ceph-common
++ - ceph-radosgw
++
++ post_tasks:
++ - name: restart rados gateway server(s)
++ service: >
++ name={{ item }}
++ state=restarted
++ with_items:
++ - radosgw
++ when: radosgw_frontend == 'civetweb'
++
++ - name: restart rados gateway server(s)
++ service: >
++ name={{ item }}
++ state=restarted
++ with_items:
++ - apache2
++ - radosgw
++ when: radosgw_frontend == 'apache'