From: Sébastien Han Date: Wed, 17 Aug 2016 09:48:42 +0000 (+0200) Subject: create a directory for infrastructure playbooks X-Git-Tag: v1.0.6~21^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F939%2Fhead;p=ceph-ansible.git create a directory for infrastructure playbooks Since we have a couple of infrastructure related playbooks (additionnally to the roles we are using to deploy Ceph), it makes sense to have them located in a separate directory. Signed-off-by: Sébastien Han --- diff --git a/cluster-maintenance.yml b/cluster-maintenance.yml deleted file mode 100644 index c559ed62f..000000000 --- a/cluster-maintenance.yml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# This playbook was made to automate Ceph servers maintenance -# Typical use case: hardware change -# By running this playbook you will set the 'noout' flag on your -# cluster, which means that OSD **can't** be marked as out -# of the CRUSH map, but they will be marked as down. -# Basically we tell the cluster to don't move any data since -# the operation won't last for too long. - -- hosts: - gather_facts: False - - tasks: - - - name: Set the noout flag - command: ceph osd set noout - delegate_to: - - - name: Turn off the server - command: poweroff - - - name: Wait for the server to go down - local_action: > - wait_for host= - port=22 - state=stopped - - - name: Wait for the server to come up - local_action: > - wait_for host= - port=22 - delay=10 - timeout=3600 - - - name: Unset the noout flag - command: ceph osd unset noout - delegate_to: diff --git a/cluster-os-migration.yml b/cluster-os-migration.yml deleted file mode 100644 index 9053dfd91..000000000 --- a/cluster-os-migration.yml +++ /dev/null @@ -1,557 +0,0 @@ ---- -# This playbook was meant to upgrade a node from Ubuntu to RHEL. -# We are performing a set of actions prior to reboot the node. -# The node reboots via PXE and gets its new operating system. -# This playbook only works for monitors and OSDs. -# Note that some of the checks are ugly: -# ie: the when migration_completed.stat.exists -# can be improved with includes, however I wanted to keep a single file... -# - -- hosts: mons - serial: 1 - sudo: True - - vars: - backup_dir: /tmp/ - - tasks: - - - name: Check if the node has be migrated already - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed - register: migration_completed - failed_when: false - - - name: Check for failed run - stat: > - path=/var/lib/ceph/{{ ansible_hostname }}.tar - register: mon_archive_leftover - - - fail: msg="Looks like an archive is already there, please remove it!" - when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True - - - name: Compress the store as much as possible - command: ceph tell mon.{{ ansible_hostname }} compact - when: migration_completed.stat.exists == False - - - name: Check if sysvinit - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit - register: monsysvinit - changed_when: False - - - name: Check if upstart - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart - register: monupstart - changed_when: False - - - name: Check if init does what it is supposed to do (Sysvinit) - shell: > - ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null - register: ceph_status_sysvinit - changed_when: False - - # can't complete the condition since the previous taks never ran... - - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" - when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True - - - name: Check if init does what it is supposed to do (upstart) - shell: > - ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null - register: ceph_status_upstart - changed_when: False - - - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" - when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True - - - name: Restart the Monitor after compaction (Upstart) - service: > - name=ceph-mon - state=restarted - args=id={{ ansible_hostname }} - when: monupstart.stat.exists == True and migration_completed.stat.exists == False - - - name: Restart the Monitor after compaction (Sysvinit) - service: > - name=ceph - state=restarted - args=mon - when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False - - - name: Wait for the monitor to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - when: migration_completed.stat.exists == False - - - name: Stop the monitor (Upstart) - service: > - name=ceph-mon - state=stopped - args=id={{ ansible_hostname }} - when: monupstart.stat.exists == True and migration_completed.stat.exists == False - - - name: Stop the monitor (Sysvinit) - service: > - name=ceph - state=stopped - args=mon - when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False - - - name: Wait for the monitor to be down - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - state=stopped - when: migration_completed.stat.exists == False - - - name: Create a backup directory - file: > - path={{ backup_dir }}/monitors-backups - state=directory - owner=root - group=root - mode=0644 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - when: migration_completed.stat.exists == False - - # NOTE (leseb): should we convert upstart to sysvinit here already? - - name: Archive monitor stores - shell: > - tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar - chdir=/var/lib/ceph/ - creates={{ ansible_hostname }}.tar - when: migration_completed.stat.exists == False - - - name: Scp the Monitor store - fetch: > - src=/var/lib/ceph/{{ ansible_hostname }}.tar - dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar - flat=yes - when: migration_completed.stat.exists == False - - - name: Reboot the server - command: reboot - when: migration_completed.stat.exists == False - - - name: Wait for the server to come up - local_action: > - wait_for - port=22 - delay=10 - timeout=3600 - when: migration_completed.stat.exists == False - - - name: Wait a bit more to be sure that the server is ready - pause: seconds=20 - when: migration_completed.stat.exists == False - - - name: Check if sysvinit - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit - register: monsysvinit - changed_when: False - - - name: Check if upstart - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart - register: monupstart - changed_when: False - - - name: Make sure the monitor is stopped (Upstart) - service: > - name=ceph-mon - state=stopped - args=id={{ ansible_hostname }} - when: monupstart.stat.exists == True and migration_completed.stat.exists == False - - - name: Make sure the monitor is stopped (Sysvinit) - service: > - name=ceph - state=stopped - args=mon - when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False - - # NOTE (leseb): 'creates' was added in Ansible 1.6 - - name: Copy and unarchive the monitor store - unarchive: > - src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar - dest=/var/lib/ceph/ - copy=yes - mode=0600 - creates=etc/ceph/ceph.conf - when: migration_completed.stat.exists == False - - - name: Copy keys and configs - shell: > - cp etc/ceph/* /etc/ceph/ - chdir=/var/lib/ceph/ - when: migration_completed.stat.exists == False - - - name: Configure RHEL7 for sysvinit - shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; - when: migration_completed.stat.exists == False - - # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary - # so we directly call sysvinit - - name: Start the monitor - service: > - name=ceph - state=started - args=mon - when: migration_completed.stat.exists == False - - - name: Wait for the Monitor to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - when: migration_completed.stat.exists == False - - - name: Waiting for the monitor to join the quorum... - shell: > - ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }} - register: result - until: result.rc == 0 - retries: 5 - delay: 10 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - when: migration_completed.stat.exists == False - - - name: Done moving to the next monitor - file: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed - state=touch - owner=root - group=root - mode=0600 - when: migration_completed.stat.exists == False - -- hosts: osds - serial: 1 - sudo: True - - vars: - backup_dir: /tmp/ - - tasks: - - name: Check if the node has be migrated already - stat: > - path=/var/lib/ceph/migration_completed - register: migration_completed - failed_when: false - - - name: Check for failed run - stat: > - path=/var/lib/ceph/{{ ansible_hostname }}.tar - register: osd_archive_leftover - - - fail: msg="Looks like an archive is already there, please remove it!" - when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True - - - name: Check if init does what it is supposed to do (Sysvinit) - shell: > - ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null - register: ceph_status_sysvinit - changed_when: False - - # can't complete the condition since the previous taks never ran... - - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" - when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True - - - name: Check if init does what it is supposed to do (upstart) - shell: > - ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]" - register: ceph_status_upstart - changed_when: False - - - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" - when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True - - - name: Set the noout flag - command: ceph osd set noout - delegate_to: "{{ item }}" - with_items: groups.mons[0] - when: migration_completed.stat.exists == False - - - name: Check if sysvinit - shell: stat /var/lib/ceph/osd/ceph-*/sysvinit - register: osdsysvinit - failed_when: false - changed_when: False - - - name: Check if upstart - shell: stat /var/lib/ceph/osd/ceph-*/upstart - register: osdupstart - failed_when: false - changed_when: False - - - name: Archive ceph configs - shell: > - tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar - chdir=/var/lib/ceph/ - creates={{ ansible_hostname }}.tar - when: migration_completed.stat.exists == False - - - name: Create backup directory - file: > - path={{ backup_dir }}/osds-backups - state=directory - owner=root - group=root - mode=0644 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - when: migration_completed.stat.exists == False - - - name: Scp OSDs dirs and configs - fetch: > - src=/var/lib/ceph/{{ ansible_hostname }}.tar - dest={{ backup_dir }}/osds-backups/ - flat=yes - when: migration_completed.stat.exists == False - - - name: Collect OSD ports - shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq - register: osd_ports - when: migration_completed.stat.exists == False - - - name: Gracefully stop the OSDs (Upstart) - service: > - name=ceph-osd-all - state=stopped - when: osdupstart.rc == 0 and migration_completed.stat.exists == False - - - name: Gracefully stop the OSDs (Sysvinit) - service: > - name=ceph - state=stopped - args=mon - when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False - - - name: Wait for the OSDs to be down - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port={{ item }} - timeout=10 - state=stopped - with_items: - - "{{ osd_ports.stdout_lines }}" - when: migration_completed.stat.exists == False - - - name: Configure RHEL with sysvinit - shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; - when: migration_completed.stat.exists == False - - - name: Reboot the server - command: reboot - when: migration_completed.stat.exists == False - - - name: Wait for the server to come up - local_action: > - wait_for - port=22 - delay=10 - timeout=3600 - when: migration_completed.stat.exists == False - - - name: Wait a bit to be sure that the server is ready for scp - pause: seconds=20 - when: migration_completed.stat.exists == False - - # NOTE (leseb): 'creates' was added in Ansible 1.6 - - name: Copy and unarchive the OSD configs - unarchive: > - src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar - dest=/var/lib/ceph/ - copy=yes - mode=0600 - creates=etc/ceph/ceph.conf - when: migration_completed.stat.exists == False - - - name: Copy keys and configs - shell: > - cp etc/ceph/* /etc/ceph/ - chdir=/var/lib/ceph/ - when: migration_completed.stat.exists == False - - # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary - # so we directly call sysvinit - - name: Start all the OSDs - service: > - name=ceph-osd-all - state=started - args=osd - when: migration_completed.stat.exists == False - - # NOTE (leseb): this is tricky unless this is set into the ceph.conf - # listened ports can be predicted, thus they will change after each restart -# - name: Wait for the OSDs to be up again -# local_action: > -# wait_for -# host={{ ansible_ssh_host | default(inventory_hostname) }} -# port={{ item }} -# timeout=30 -# with_items: -# - "{{ osd_ports.stdout_lines }}" - - - name: Waiting for clean PGs... - shell: > - test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN" - register: result - until: result.rc == 0 - retries: 10 - delay: 10 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - when: migration_completed.stat.exists == False - - - name: Done moving to the next OSD - file: > - path=/var/lib/ceph/migration_completed - state=touch - owner=root - group=root - mode=0600 - when: migration_completed.stat.exists == False - - - name: Unset the noout flag - command: ceph osd unset noout - delegate_to: "{{ item }}" - with_items: groups.mons[0] - when: migration_completed.stat.exists == False - -- hosts: rgws - serial: 1 - sudo: True - - vars: - backup_dir: /tmp/ - - tasks: - - name: Check if the node has be migrated already - stat: > - path=/var/lib/ceph/radosgw/migration_completed - register: migration_completed - failed_when: false - - - name: Check for failed run - stat: > - path=/var/lib/ceph/{{ ansible_hostname }}.tar - register: rgw_archive_leftover - - - fail: msg="Looks like an archive is already there, please remove it!" - when: migration_completed.stat.exists == False and rgw_archive_leftover.stat.exists == True - - - name: Archive rados gateway configs - shell: > - tar -cpvzf - --one-file-system . /etc/ceph/* /etc/apache2/* | cat > {{ ansible_hostname }}.tar - chdir=/var/lib/ceph/ - creates={{ ansible_hostname }}.tar - when: migration_completed.stat.exists == False - - - name: Create backup directory - file: > - path={{ backup_dir }}/rgws-backups - state=directory - owner=root - group=root - mode=0644 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - when: migration_completed.stat.exists == False - - - name: Scp RGWs dirs and configs - fetch: > - src=/var/lib/ceph/{{ ansible_hostname }}.tar - dest={{ backup_dir }}/rgws-backups/ - flat=yes - when: migration_completed.stat.exists == False - - - name: Gracefully stop the rados gateway and apache - service: > - name={{ item }} - state=stopped - with_items: - - apache2 - - radosgw - when: migration_completed.stat.exists == False - - - name: Wait for radosgw to be down - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - path=/tmp/radosgw.sock - state=absent - timeout=30 - when: migration_completed.stat.exists == False - - - name: Reboot the server - command: reboot - when: migration_completed.stat.exists == False - - - name: Wait for the server to come up - local_action: > - wait_for - port=22 - delay=10 - timeout=3600 - when: migration_completed.stat.exists == False - - - name: Wait a bit to be sure that the server is ready for scp - pause: seconds=20 - when: migration_completed.stat.exists == False - - # NOTE (leseb): 'creates' was added in Ansible 1.6 - - name: Copy and unarchive the OSD configs - unarchive: > - src={{ backup_dir }}/rgws-backups/{{ ansible_hostname }}.tar - dest=/var/lib/ceph/ - copy=yes - mode=0600 - creates=etc/ceph/ceph.conf - when: migration_completed.stat.exists == False - - - name: Copy keys and configs - shell: {{ item }} chdir=/var/lib/ceph/ - with_items: - - cp etc/ceph/* /etc/ceph/ - - cp -r etc/apache2/* /etc/httpd/ - when: migration_completed.stat.exists == False - - - name: Start rados gateway and httpd - service: > - name={{ item }} - state=started - with_items: - - httpd - - radosgw - when: migration_completed.stat.exists == False - - - name: Wait for radosgw to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - path=/tmp/radosgw.sock - state=present - timeout=30 - when: migration_completed.stat.exists == False - - - name: Done moving to the next rados gateway - file: > - path=/var/lib/ceph/radosgw/migration_completed - state=touch - owner=root - group=root - mode=0600 - when: migration_completed.stat.exists == False diff --git a/infrastructure-playbooks/README.md b/infrastructure-playbooks/README.md new file mode 100644 index 000000000..ba32864c0 --- /dev/null +++ b/infrastructure-playbooks/README.md @@ -0,0 +1,5 @@ +Infrastructure playbooks +======================== + +This directory contains a variety of playbooks that can be used independently of the Ceph roles we have. +They aim to perform infrastructure related tasks that would help use managing a Ceph cluster or performing certain operational tasks. diff --git a/infrastructure-playbooks/cluster-maintenance.yml b/infrastructure-playbooks/cluster-maintenance.yml new file mode 100644 index 000000000..c559ed62f --- /dev/null +++ b/infrastructure-playbooks/cluster-maintenance.yml @@ -0,0 +1,37 @@ +--- +# This playbook was made to automate Ceph servers maintenance +# Typical use case: hardware change +# By running this playbook you will set the 'noout' flag on your +# cluster, which means that OSD **can't** be marked as out +# of the CRUSH map, but they will be marked as down. +# Basically we tell the cluster to don't move any data since +# the operation won't last for too long. + +- hosts: + gather_facts: False + + tasks: + + - name: Set the noout flag + command: ceph osd set noout + delegate_to: + + - name: Turn off the server + command: poweroff + + - name: Wait for the server to go down + local_action: > + wait_for host= + port=22 + state=stopped + + - name: Wait for the server to come up + local_action: > + wait_for host= + port=22 + delay=10 + timeout=3600 + + - name: Unset the noout flag + command: ceph osd unset noout + delegate_to: diff --git a/infrastructure-playbooks/cluster-os-migration.yml b/infrastructure-playbooks/cluster-os-migration.yml new file mode 100644 index 000000000..9053dfd91 --- /dev/null +++ b/infrastructure-playbooks/cluster-os-migration.yml @@ -0,0 +1,557 @@ +--- +# This playbook was meant to upgrade a node from Ubuntu to RHEL. +# We are performing a set of actions prior to reboot the node. +# The node reboots via PXE and gets its new operating system. +# This playbook only works for monitors and OSDs. +# Note that some of the checks are ugly: +# ie: the when migration_completed.stat.exists +# can be improved with includes, however I wanted to keep a single file... +# + +- hosts: mons + serial: 1 + sudo: True + + vars: + backup_dir: /tmp/ + + tasks: + + - name: Check if the node has be migrated already + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed + register: migration_completed + failed_when: false + + - name: Check for failed run + stat: > + path=/var/lib/ceph/{{ ansible_hostname }}.tar + register: mon_archive_leftover + + - fail: msg="Looks like an archive is already there, please remove it!" + when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True + + - name: Compress the store as much as possible + command: ceph tell mon.{{ ansible_hostname }} compact + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: monsysvinit + changed_when: False + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: monupstart + changed_when: False + + - name: Check if init does what it is supposed to do (Sysvinit) + shell: > + ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null + register: ceph_status_sysvinit + changed_when: False + + # can't complete the condition since the previous taks never ran... + - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True + + - name: Check if init does what it is supposed to do (upstart) + shell: > + ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null + register: ceph_status_upstart + changed_when: False + + - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True + + - name: Restart the Monitor after compaction (Upstart) + service: > + name=ceph-mon + state=restarted + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Restart the Monitor after compaction (Sysvinit) + service: > + name=ceph + state=restarted + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + - name: Wait for the monitor to be up again + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + when: migration_completed.stat.exists == False + + - name: Stop the monitor (Upstart) + service: > + name=ceph-mon + state=stopped + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Stop the monitor (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + - name: Wait for the monitor to be down + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + state=stopped + when: migration_completed.stat.exists == False + + - name: Create a backup directory + file: > + path={{ backup_dir }}/monitors-backups + state=directory + owner=root + group=root + mode=0644 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + # NOTE (leseb): should we convert upstart to sysvinit here already? + - name: Archive monitor stores + shell: > + tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar + chdir=/var/lib/ceph/ + creates={{ ansible_hostname }}.tar + when: migration_completed.stat.exists == False + + - name: Scp the Monitor store + fetch: > + src=/var/lib/ceph/{{ ansible_hostname }}.tar + dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar + flat=yes + when: migration_completed.stat.exists == False + + - name: Reboot the server + command: reboot + when: migration_completed.stat.exists == False + + - name: Wait for the server to come up + local_action: > + wait_for + port=22 + delay=10 + timeout=3600 + when: migration_completed.stat.exists == False + + - name: Wait a bit more to be sure that the server is ready + pause: seconds=20 + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: monsysvinit + changed_when: False + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: monupstart + changed_when: False + + - name: Make sure the monitor is stopped (Upstart) + service: > + name=ceph-mon + state=stopped + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Make sure the monitor is stopped (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + # NOTE (leseb): 'creates' was added in Ansible 1.6 + - name: Copy and unarchive the monitor store + unarchive: > + src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar + dest=/var/lib/ceph/ + copy=yes + mode=0600 + creates=etc/ceph/ceph.conf + when: migration_completed.stat.exists == False + + - name: Copy keys and configs + shell: > + cp etc/ceph/* /etc/ceph/ + chdir=/var/lib/ceph/ + when: migration_completed.stat.exists == False + + - name: Configure RHEL7 for sysvinit + shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; + when: migration_completed.stat.exists == False + + # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary + # so we directly call sysvinit + - name: Start the monitor + service: > + name=ceph + state=started + args=mon + when: migration_completed.stat.exists == False + + - name: Wait for the Monitor to be up again + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + when: migration_completed.stat.exists == False + + - name: Waiting for the monitor to join the quorum... + shell: > + ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }} + register: result + until: result.rc == 0 + retries: 5 + delay: 10 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Done moving to the next monitor + file: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed + state=touch + owner=root + group=root + mode=0600 + when: migration_completed.stat.exists == False + +- hosts: osds + serial: 1 + sudo: True + + vars: + backup_dir: /tmp/ + + tasks: + - name: Check if the node has be migrated already + stat: > + path=/var/lib/ceph/migration_completed + register: migration_completed + failed_when: false + + - name: Check for failed run + stat: > + path=/var/lib/ceph/{{ ansible_hostname }}.tar + register: osd_archive_leftover + + - fail: msg="Looks like an archive is already there, please remove it!" + when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True + + - name: Check if init does what it is supposed to do (Sysvinit) + shell: > + ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null + register: ceph_status_sysvinit + changed_when: False + + # can't complete the condition since the previous taks never ran... + - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True + + - name: Check if init does what it is supposed to do (upstart) + shell: > + ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]" + register: ceph_status_upstart + changed_when: False + + - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True + + - name: Set the noout flag + command: ceph osd set noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + shell: stat /var/lib/ceph/osd/ceph-*/sysvinit + register: osdsysvinit + failed_when: false + changed_when: False + + - name: Check if upstart + shell: stat /var/lib/ceph/osd/ceph-*/upstart + register: osdupstart + failed_when: false + changed_when: False + + - name: Archive ceph configs + shell: > + tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar + chdir=/var/lib/ceph/ + creates={{ ansible_hostname }}.tar + when: migration_completed.stat.exists == False + + - name: Create backup directory + file: > + path={{ backup_dir }}/osds-backups + state=directory + owner=root + group=root + mode=0644 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Scp OSDs dirs and configs + fetch: > + src=/var/lib/ceph/{{ ansible_hostname }}.tar + dest={{ backup_dir }}/osds-backups/ + flat=yes + when: migration_completed.stat.exists == False + + - name: Collect OSD ports + shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq + register: osd_ports + when: migration_completed.stat.exists == False + + - name: Gracefully stop the OSDs (Upstart) + service: > + name=ceph-osd-all + state=stopped + when: osdupstart.rc == 0 and migration_completed.stat.exists == False + + - name: Gracefully stop the OSDs (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False + + - name: Wait for the OSDs to be down + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port={{ item }} + timeout=10 + state=stopped + with_items: + - "{{ osd_ports.stdout_lines }}" + when: migration_completed.stat.exists == False + + - name: Configure RHEL with sysvinit + shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; + when: migration_completed.stat.exists == False + + - name: Reboot the server + command: reboot + when: migration_completed.stat.exists == False + + - name: Wait for the server to come up + local_action: > + wait_for + port=22 + delay=10 + timeout=3600 + when: migration_completed.stat.exists == False + + - name: Wait a bit to be sure that the server is ready for scp + pause: seconds=20 + when: migration_completed.stat.exists == False + + # NOTE (leseb): 'creates' was added in Ansible 1.6 + - name: Copy and unarchive the OSD configs + unarchive: > + src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar + dest=/var/lib/ceph/ + copy=yes + mode=0600 + creates=etc/ceph/ceph.conf + when: migration_completed.stat.exists == False + + - name: Copy keys and configs + shell: > + cp etc/ceph/* /etc/ceph/ + chdir=/var/lib/ceph/ + when: migration_completed.stat.exists == False + + # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary + # so we directly call sysvinit + - name: Start all the OSDs + service: > + name=ceph-osd-all + state=started + args=osd + when: migration_completed.stat.exists == False + + # NOTE (leseb): this is tricky unless this is set into the ceph.conf + # listened ports can be predicted, thus they will change after each restart +# - name: Wait for the OSDs to be up again +# local_action: > +# wait_for +# host={{ ansible_ssh_host | default(inventory_hostname) }} +# port={{ item }} +# timeout=30 +# with_items: +# - "{{ osd_ports.stdout_lines }}" + + - name: Waiting for clean PGs... + shell: > + test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN" + register: result + until: result.rc == 0 + retries: 10 + delay: 10 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Done moving to the next OSD + file: > + path=/var/lib/ceph/migration_completed + state=touch + owner=root + group=root + mode=0600 + when: migration_completed.stat.exists == False + + - name: Unset the noout flag + command: ceph osd unset noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + when: migration_completed.stat.exists == False + +- hosts: rgws + serial: 1 + sudo: True + + vars: + backup_dir: /tmp/ + + tasks: + - name: Check if the node has be migrated already + stat: > + path=/var/lib/ceph/radosgw/migration_completed + register: migration_completed + failed_when: false + + - name: Check for failed run + stat: > + path=/var/lib/ceph/{{ ansible_hostname }}.tar + register: rgw_archive_leftover + + - fail: msg="Looks like an archive is already there, please remove it!" + when: migration_completed.stat.exists == False and rgw_archive_leftover.stat.exists == True + + - name: Archive rados gateway configs + shell: > + tar -cpvzf - --one-file-system . /etc/ceph/* /etc/apache2/* | cat > {{ ansible_hostname }}.tar + chdir=/var/lib/ceph/ + creates={{ ansible_hostname }}.tar + when: migration_completed.stat.exists == False + + - name: Create backup directory + file: > + path={{ backup_dir }}/rgws-backups + state=directory + owner=root + group=root + mode=0644 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Scp RGWs dirs and configs + fetch: > + src=/var/lib/ceph/{{ ansible_hostname }}.tar + dest={{ backup_dir }}/rgws-backups/ + flat=yes + when: migration_completed.stat.exists == False + + - name: Gracefully stop the rados gateway and apache + service: > + name={{ item }} + state=stopped + with_items: + - apache2 + - radosgw + when: migration_completed.stat.exists == False + + - name: Wait for radosgw to be down + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + path=/tmp/radosgw.sock + state=absent + timeout=30 + when: migration_completed.stat.exists == False + + - name: Reboot the server + command: reboot + when: migration_completed.stat.exists == False + + - name: Wait for the server to come up + local_action: > + wait_for + port=22 + delay=10 + timeout=3600 + when: migration_completed.stat.exists == False + + - name: Wait a bit to be sure that the server is ready for scp + pause: seconds=20 + when: migration_completed.stat.exists == False + + # NOTE (leseb): 'creates' was added in Ansible 1.6 + - name: Copy and unarchive the OSD configs + unarchive: > + src={{ backup_dir }}/rgws-backups/{{ ansible_hostname }}.tar + dest=/var/lib/ceph/ + copy=yes + mode=0600 + creates=etc/ceph/ceph.conf + when: migration_completed.stat.exists == False + + - name: Copy keys and configs + shell: {{ item }} chdir=/var/lib/ceph/ + with_items: + - cp etc/ceph/* /etc/ceph/ + - cp -r etc/apache2/* /etc/httpd/ + when: migration_completed.stat.exists == False + + - name: Start rados gateway and httpd + service: > + name={{ item }} + state=started + with_items: + - httpd + - radosgw + when: migration_completed.stat.exists == False + + - name: Wait for radosgw to be up again + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + path=/tmp/radosgw.sock + state=present + timeout=30 + when: migration_completed.stat.exists == False + + - name: Done moving to the next rados gateway + file: > + path=/var/lib/ceph/radosgw/migration_completed + state=touch + owner=root + group=root + mode=0600 + when: migration_completed.stat.exists == False diff --git a/infrastructure-playbooks/localrepo-site.yml.sample b/infrastructure-playbooks/localrepo-site.yml.sample new file mode 100644 index 000000000..d7fdf8320 --- /dev/null +++ b/infrastructure-playbooks/localrepo-site.yml.sample @@ -0,0 +1,56 @@ +--- +# Sample Playbook for local mirrors. +# Additional/optional step to generate repos.d file for local mirrors. +# Defines deployment design and assigns role to server groups + +- hosts: all + max_fail_percentage: 0 + become: True + vars: + repolist: + - { src: "//localmirror_ceph.repo", dest: "/etc/yum.repos.d/ceph.repo" } + tasks: + - name: Copy User provided repo files to /etc/yum.repos.d/ + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: root + group: root + with_items: + - "{{ repolist }}" + +- hosts: mons + become: True + roles: + - ceph-mon + +- hosts: agents + become: True + roles: + - ceph-agent + +- hosts: osds + become: True + roles: + - ceph-osd + +- hosts: mdss + become: True + roles: + - ceph-mds + +- hosts: rgws + become: True + roles: + - ceph-rgw + +- hosts: nfss + become: True + roles: + - ceph-nfs + +- hosts: restapis + become: True + roles: + - ceph-restapi + diff --git a/infrastructure-playbooks/osd-configure.yml b/infrastructure-playbooks/osd-configure.yml new file mode 100644 index 000000000..a193dca10 --- /dev/null +++ b/infrastructure-playbooks/osd-configure.yml @@ -0,0 +1,18 @@ +--- +# This playbook is used to add a new OSD to +# an existing cluster without the need for running +# the ceph-common or ceph-mon role again against all +# of the existing monitors. +# +# Ensure that all monitors are present in the mons +# group in your inventory so that the ceph.conf is +# created correctly for the new OSD. +- hosts: mons + become: True + roles: + - ceph-fetch-keys + +- hosts: osds + become: True + roles: + - ceph-osd diff --git a/infrastructure-playbooks/purge-cluster.yml b/infrastructure-playbooks/purge-cluster.yml new file mode 100644 index 000000000..d5e001013 --- /dev/null +++ b/infrastructure-playbooks/purge-cluster.yml @@ -0,0 +1,453 @@ +--- +# This playbook purges Ceph +# It removes: packages, configuration files and ALL THE DATA +# +# Use it like this: +# ansible-playbook purge-cluster.yml +# Prompts for confirmation to purge, defaults to no and +# doesn't purge the cluster. yes purges the cluster. +# +# ansible-playbook -e ireallymeanit=yes|no purge-cluster.yml +# Overrides the prompt using -e option. Can be used in +# automation scripts to avoid interactive prompt. + +- name: confirm whether user really meant to purge the cluster + hosts: localhost + + vars_prompt: + - name: ireallymeanit + prompt: Are you sure you want to purge the cluster? + default: 'no' + private: no + + tasks: + - name: exit playbook, if user did not mean to purge cluster + fail: + msg: > + "Exiting purge-cluster playbook, cluster was NOT purged. + To purge the cluster, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' + +- name: stop ceph cluster + hosts: + - mons + - osds + - mdss + - rgws + - nfss + + become: yes + + vars: + osd_group_name: osds + mon_group_name: mons + rgw_group_name: rgws + mds_group_name: mdss + nfs_group_name: nfss + rbdmirror_group_name: rbdmirrors + +# When set to true both groups of packages are purged. +# This can cause problem with qemu-kvm + purge_all_packages: true + +# When set to true and raw _multi_journal is used then block devices are also zapped + zap_block_devs: true + + ceph_packages: + - ceph + - ceph-common + - ceph-fs-common + - ceph-fuse + - ceph-mds + - ceph-release + - ceph-radosgw + + ceph_remaining_packages: + - libcephfs1 + - librados2 + - libradosstriper1 + - librbd1 + - python-cephfs + - python-rados + - python-rbd + + cluster: ceph # name of the cluster + monitor_name: "{{ ansible_hostname }}" + mds_name: "{{ ansible_hostname }}" + + + handlers: + - name: restart machine + shell: sleep 2 && shutdown -r now "Ansible updates triggered" + async: 1 + poll: 0 + ignore_errors: true + + - name: wait for server to boot + local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=400 + + - name: remove data + file: + path: /var/lib/ceph + state: absent + + tasks: + - name: check for a device list + fail: + msg: "OSD automatic discovery was detected, purge cluster does not support this scenario. If you want to purge the cluster, manually provide the list of devices in group_vars/osds using the devices variable." + when: + osd_group_name in group_names and + devices is not defined and + osd_auto_discovery + + - name: get osd numbers + shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" + register: osd_ids + changed_when: false + + - name: are we using systemd + shell: "if [ -d /usr/lib/systemd ] ; then find /usr/lib/systemd/system -name 'ceph*' | wc -l ; else echo 0 ; fi" + register: systemd_unit_files + +# after Hammer release + + - name: stop ceph.target with systemd + service: + name: ceph.target + state: stopped + enabled: no + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" + + - name: stop ceph-osd with systemd + service: + name: ceph-osd@{{item}} + state: stopped + enabled: no + with_items: "{{ osd_ids.stdout_lines }}" + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + osd_group_name in group_names + + - name: stop ceph mons with systemd + service: + name: ceph-mon@{{ ansible_hostname }} + state: stopped + enabled: no + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + mon_group_name in group_names + + - name: stop ceph mdss with systemd + service: + name: ceph-mds@{{ ansible_hostname }} + state: stopped + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + mds_group_name in group_names + + - name: stop ceph rgws with systemd + service: + name: ceph-radosgw@rgw.{{ ansible_hostname }} + state: stopped + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + rgw_group_name in group_names + + - name: stop ceph nfss with systemd + service: + name: nfs-ganesha + state: stopped + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + nfs_group_name in group_names + + - name: stop ceph rbd mirror with systemd + service: + name: ceph-rbd-mirror@admin.service + state: stopped + when: + ansible_os_family == 'RedHat' and + systemd_unit_files.stdout != "0" and + rbdmirror_group_name in group_names + +# before infernalis release, using sysvinit scripts +# we use this test so we do not have to know which RPM contains the boot script +# or where it is placed. + + - name: stop ceph osds + shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi" + when: + ansible_os_family == 'RedHat' and + osd_group_name in group_names + + - name: stop ceph mons + shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi" + when: + ansible_os_family == 'RedHat' and + mon_group_name in group_names + + - name: stop ceph mdss + shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi" + when: + ansible_os_family == 'RedHat' and + mds_group_name in group_names + + - name: stop ceph rgws + shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi" + when: + ansible_os_family == 'RedHat' and + rgw_group_name in group_names + + - name: stop ceph nfss + shell: "service nfs-ganesha status ; if [ $? == 0 ] ; then service nfs-ganesha stop ; else echo ; fi" + when: + ansible_os_family == 'RedHat' and + nfs_group_name in group_names + +# Ubuntu 14.04 + - name: stop ceph osds on ubuntu + shell: | + for id in $(ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'); do + initctl stop ceph-osd cluster={{ cluster }} id=$id + done + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + osd_group_name in group_names + with_items: "{{ osd_ids.stdout_lines }}" + + - name: stop ceph mons on ubuntu + command: initctl stop ceph-mon cluster={{ cluster }} id={{ monitor_name }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + mon_group_name in group_names + + - name: stop ceph mdss on ubuntu + command: initctl stop ceph-mds cluster={{ cluster }} id={{ mds_name }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + mds_group_name in group_names + + - name: stop ceph rgws on ubuntu + command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }} + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + rgw_group_name in group_names + + - name: stop ceph nfss on ubuntu + command: initctl stop nfs-ganesha + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + nfs_group_name in group_names + + - name: stop ceph rbd mirror on ubuntu + command: initctl stop ceph-rbd-mirorr cluster={{ cluster }} id=admin + failed_when: false + when: + ansible_distribution == 'Ubuntu' and + rbdmirror_group_name in group_names + + - name: check for anything running ceph + shell: "ps awux | grep -- [c]eph-" + register: check_for_running_ceph + failed_when: check_for_running_ceph.rc == 0 + + - name: see if ceph-disk-created data partitions are present + shell: "ls /dev/disk/by-partlabel | grep -q 'ceph\\\\x20data'" + failed_when: false + register: ceph_data_partlabels + + - name: see if ceph-disk-created journal partitions are present + shell: "ls /dev/disk/by-partlabel | grep -q 'ceph\\\\x20journal'" + failed_when: false + register: ceph_journal_partlabels + + - name: get osd data mount points + shell: "(grep /var/lib/ceph/osd /proc/mounts || echo -n) | awk '{ print $2 }'" + register: mounted_osd + changed_when: false + + - name: drop all cache + shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches" + when: + osd_group_name in group_names + + - name: umount osd data partition + shell: umount {{ item }} + with_items: + - "{{ mounted_osd.stdout_lines }}" + when: + osd_group_name in group_names + + - name: remove osd mountpoint tree + file: + path: /var/lib/ceph/osd/ + state: absent + register: remove_osd_mountpoints + ignore_errors: true + when: + osd_group_name in group_names + + - name: remove monitor store and bootstrap keys + file: + path: /var/lib/ceph/ + state: absent + when: + mon_group_name in group_names + + - name: is reboot needed + local_action: shell echo requesting reboot + notify: + - restart machine + - wait for server to boot + - remove data + when: + osd_group_name in group_names and + remove_osd_mountpoints.failed is defined + + - name: see if ceph-disk is installed + shell: "which ceph-disk" + failed_when: false + register: ceph_disk_present + + - name: zap osd disks + shell: ceph-disk zap "{{ item }}" + with_items: "{{ devices | default([]) }}" + when: + osd_group_name in group_names and + ceph_disk_present.rc == 0 and + ceph_data_partlabels.rc == 0 and + zap_block_devs + + - name: zap journal devices + shell: ceph-disk zap "{{ item }}" + with_items: "{{ raw_journal_devices|default([])|unique }}" + when: + osd_group_name in group_names and + ceph_disk_present.rc == 0 and + ceph_journal_partlabels.rc == 0 and + zap_block_devs and + raw_multi_journal + + - name: purge ceph packages with yum + yum: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_packages }}" + when: + ansible_pkg_mgr == 'yum' + + - name: purge ceph packages with dnf + dnf: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_packages }}" + when: + ansible_pkg_mgr == 'dnf' + + - name: purge ceph packages with apt + apt: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_packages }}" + when: + ansible_pkg_mgr == 'apt' + + - name: purge remaining ceph packages with yum + yum: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_remaining_packages }}" + when: + ansible_pkg_mgr == 'yum' and + purge_all_packages == true + + - name: purge remaining ceph packages with dnf + dnf: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_remaining_packages }}" + when: + ansible_pkg_mgr == 'dnf' and + purge_all_packages == true + + - name: purge remaining ceph packages with apt + apt: + name: "{{ item }}" + state: absent + with_items: + - "{{ ceph_remaining_packages }}" + when: + ansible_pkg_mgr == 'apt' and + purge_all_packages == true + + - name: remove config + file: + path: /etc/ceph + state: absent + + - name: remove logs + file: + path: /var/log/ceph + state: absent + + - name: remove from SysV + shell: "update-rc.d -f ceph remove" + when: + ansible_distribution == 'Ubuntu' + + - name: remove Upstart nad SysV files + shell: "find /etc -name '*ceph*' -delete" + when: + ansible_distribution == 'Ubuntu' + + - name: remove Upstart and apt logs and cache + shell: "find /var -name '*ceph*' -delete" + when: + ansible_distribution == 'Ubuntu' + + - name: request data removal + local_action: shell echo requesting data removal + become: false + notify: + - remove data + + - name: purge dnf cache + command: dnf clean all + when: + ansible_pkg_mgr == 'dnf' + + - name: purge RPM cache in /tmp + file: + path: /tmp/rh-storage-repo + state: absent + + - name: clean apt + shell: apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + when: + ansible_pkg_mgr == 'apt' + + - name: purge rh_storage.repo file in /etc/yum.repos.d + file: + path: /etc/yum.repos.d/rh_storage.repo + state: absent + when: + ansible_os_family == 'RedHat' diff --git a/infrastructure-playbooks/purge-docker-cluster.yml b/infrastructure-playbooks/purge-docker-cluster.yml new file mode 100644 index 000000000..3f33cbdac --- /dev/null +++ b/infrastructure-playbooks/purge-docker-cluster.yml @@ -0,0 +1,540 @@ +--- +# This playbook purges a containerized Ceph cluster +# It removes: packages, containers, configuration files and ALL THE DATA + +- name: confirm whether user really meant to purge the cluster + + hosts: + - localhost + + gather_facts: false + + vars_prompt: + - name: ireallymeanit + prompt: Are you sure you want to purge the cluster? + default: 'no' + private: no + + - name: remove_packages + prompt: > + If --skip-tags=with_pkg is not set docker packages + and more will be uninstalled from non-atomic hosts. + Do you want to continue? + default: 'no' + private: no + + tasks: + - name: exit playbook, if user did not mean to purge cluster + fail: + msg: > + "Exiting purge-docker-cluster playbook, cluster was NOT purged. + To purge the cluster, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' + + - name: exit playbook, if user did not mean to remove packages + fail: + msg: > + "Exiting purge-docker-cluster playbook. No packages were removed. + To skip removing packages use --skip-tag=with_pkg. To continue + with removing packages, do not specify --skip-tag=with_pkg and + either say 'yes' on the prompt or use `-e remove_packages=yes` + on the command line when invoking the playbook" + when: remove_packages != 'yes' + + +- name: purge ceph mds cluster + + vars: + mds_group_name: mdss + + hosts: + - "{{ mds_group_name }}" + + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: roles/ceph-mds/defaults/main.yml + - include_vars: group_vars/all + + - name: disable ceph mds service + service: + name: "ceph-mds@{{ ansible_hostname }}" + state: stopped + enabled: no + ignore_errors: true + + - name: remove ceph mds container + docker: + image: "{{ ceph_mds_docker_username }}/{{ ceph_mds_docker_imagename }}:{{ ceph_mds_docker_image_tag }}" + name: "{{ ansible_hostname }}" + state: absent + ignore_errors: true + + - name: remove ceph mds service + file: + path: /etc/systemd/system/ceph-mds@.service + state: absent + + - name: remove ceph mds image + docker_image: + state: absent + name: "{{ ceph_mds_docker_username }}/{{ ceph_mds_docker_imagename }}" + tag: "{{ ceph_mds_docker_image_tag }}" + tags: + remove_img + + +- name: purge ceph rgw cluster + + vars: + rgw_group_name: rgws + + hosts: + - "{{ rgw_group_name }}" + + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: roles/ceph-rgw/defaults/main.yml + - include_vars: group_vars/all + + - name: disable ceph rgw service + service: + name: "ceph-rgw@{{ ansible_hostname }}" + state: stopped + enabled: no + ignore_errors: true + + - name: remove ceph rgw container + docker: + image: "{{ ceph_rgw_docker_username }}/{{ ceph_rgw_docker_imagename }}:{{ ceph_rgw_docker_image_tag }}" + name: "{{ ansible_hostname }}" + state: absent + ignore_errors: true + + - name: remove ceph rgw service + file: + path: /etc/systemd/system/ceph-rgw@.service + state: absent + + - name: remove ceph rgw image + docker_image: + state: absent + name: "{{ ceph_rgw_docker_username }}/{{ ceph_rgw_docker_imagename }}" + tag: "{{ ceph_rgw_docker_image_tag }}" + tags: + remove_img + + +- name: purge ceph nfs cluster + + vars: + nfs_group_name: nfss + + hosts: + - "{{ nfs_group_name }}" + + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: roles/ceph-nfs/defaults/main.yml + - include_vars: group_vars/all + + - name: disable ceph nfs service + service: + name: "ceph-nfs@{{ ansible_hostname }}" + state: stopped + enabled: no + ignore_errors: true + + - name: remove ceph nfs container + docker: + image: "{{ ceph_nfs_docker_username }}/{{ ceph_nfs_docker_imagename }}:{{ ceph_nfs_docker_image_tag }}" + name: "{{ ansible_hostname }}" + state: absent + ignore_errors: true + + - name: remove ceph nfs service + file: + path: /etc/systemd/system/ceph-nfs@.service + state: absent + + - name: remove ceph nfs directories for "{{ ansible_hostname }}" + file: + path: "{{ item }}" + state: absent + with_items: + - /etc/ganesha + - /var/lib/nfs/ganesha + - /var/run/ganesha + + - name: remove ceph nfs image + docker_image: + state: absent + name: "{{ ceph_nfs_docker_username }}/{{ ceph_nfs_docker_imagename }}" + tag: "{{ ceph_nfs_docker_image_tag }}" + tags: + remove_img + + +- name: purge ceph osd cluster + + vars: + osd_group_name: osds + + hosts: + - "{{ osd_group_name }}" + + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: roles/ceph-osd/defaults/main.yml + - include_vars: group_vars/all + + - name: disable ceph osd service + service: + name: "ceph-osd@{{ item | basename }}" + state: stopped + enabled: no + with_items: "{{ ceph_osd_docker_devices }}" + ignore_errors: true + + - name: remove ceph osd prepare container + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-prepare-{{ item | regex_replace('/', '') }}" + state: absent + with_items: "{{ ceph_osd_docker_devices }}" + ignore_errors: true + + - name: remove ceph osd container + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-{{ item | regex_replace('/', '') }}" + state: absent + with_items: "{{ ceph_osd_docker_devices }}" + ignore_errors: true + + - name: zap ceph osd disk + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" + net: host + pid: host + state: started + privileged: yes + env: "CEPH_DAEMON=zap_device,OSD_DEVICE={{ item }}" + volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/dev:/dev,/run:/run" + with_items: "{{ ceph_osd_docker_devices }}" + + - name: remove ceph osd zap disk container + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" + state: absent + with_items: "{{ ceph_osd_docker_devices }}" + + # zap twice + - name: zap ceph osd disk + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" + net: host + pid: host + state: started + privileged: yes + env: "CEPH_DAEMON=zap_device,OSD_DEVICE={{ item }}" + volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/dev:/dev,/run:/run" + with_items: "{{ ceph_osd_docker_devices }}" + + - name: remove ceph osd zap disk container + docker: + image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" + name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" + state: absent + with_items: "{{ ceph_osd_docker_devices }}" + + - name: remove ceph osd service + file: + path: /etc/systemd/system/ceph-osd@.service + state: absent + + - name: remove ceph osd image + docker_image: + state: absent + name: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}" + tag: "{{ ceph_osd_docker_image_tag }}" + tags: + remove_img + + +- name: purge ceph mon cluster + + vars: + mon_group_name: mons + + hosts: + - "{{ mon_group_name }}" + + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: roles/ceph-mon/defaults/main.yml + - include_vars: roles/ceph-restapi/defaults/main.yml + - include_vars: group_vars/all + + - name: disable ceph mon service + service: + name: "ceph-mon@{{ ansible_hostname }}" + state: stopped + enabled: no + ignore_errors: true + + - name: remove ceph mon container + docker: + image: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}:{{ ceph_mon_docker_image_tag }}" + name: "{{ ansible_hostname }}" + state: absent + ignore_errors: true + + - name: remove restapi container + docker: + image: "{{ ceph_restapi_docker_username }}/{{ ceph_restapi_docker_imagename }}:{{ ceph_restapi_docker_image_tag }}" + name: "{{ ansible_hostname }}-ceph-restapi" + state: absent + + - name: remove ceph mon service + file: + path: /etc/systemd/system/ceph-mon@.service + state: absent + + - name: remove ceph mon image + docker_image: + state: absent + name: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}" + tag: "{{ ceph_mon_docker_image_tag }}" + tags: + remove_img + + +- name: remove installed packages + + vars: + mon_group_name: mons + osd_group_name: osds + mds_group_name: mdss + rgw_group_name: rgws + nfs_group_name: nfss + + hosts: + - "{{ mon_group_name }}" + - "{{ osd_group_name }}" + - "{{ mds_group_name }}" + - "{{ rgw_group_name }}" + - "{{ nfs_group_name }}" + + become: true + + tags: + with_pkg + + tasks: + - name: check if it is Atomic host + stat: path=/run/ostree-booted + register: stat_ostree + + - name: set fact for using Atomic host + set_fact: + is_atomic: "{{ stat_ostree.stat.exists }}" + + - name: stop docker service + service: + name: docker + state: stopped + enabled: no + when: not is_atomic + + - name: remove docker-py + pip: + name: docker-py + version: 1.1.0 + state: absent + when: + ansible_version['full'] | version_compare('2.1.0.0', '<') and + not is_atomic + + - name: remove docker-py + pip: + name: docker-py + state: absent + when: + ansible_version['full'] | version_compare('2.1.0.0', '>=') and + not is_atomic + + - name: remove six + pip: + name: six + version: 1.9.0 + state: absent + when: not is_atomic + + - name: remove pip and docker on ubuntu + apt: + name: "{{ item }}" + state: absent + update_cache: yes + autoremove: yes + with_items: + - python-pip + - docker + - docker.io + when: ansible_distribution == 'Ubuntu' + + - name: remove pip and docker on debian + apt: + name: "{{ item }}" + state: absent + update_cache: yes + autoremove: yes + with_items: + - python-pip + - docker-engine + when: ansible_distribution == 'Debian' + + - name: remove epel-release on redhat + yum: + name: epel-release + state: absent + when: + ansible_os_family == 'RedHat' and + not is_atomic + + - name: remove pip on redhat + yum: + name: "{{ item }}" + state: absent + with_items: + - python-pip + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "yum" and + not is_atomic + + - name: remove docker-engine on redhat + yum: + name: "{{ item }}" + state: absent + with_items: + - docker-engine + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "yum" and + not is_atomic + + # for CentOS + - name: remove docker on redhat + yum: + name: "{{ item }}" + state: absent + with_items: + - docker + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "yum" and + not is_atomic + + - name: remove pip and docker on redhat + dnf: + name: "{{ item }}" + state: absent + with_items: + - python-pip + - docker-engine + - docker + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "dnf" and + not is_atomic + + - name: remove package dependencies on redhat + command: yum -y autoremove + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "yum" and + not is_atomic + + - name: remove package dependencies on redhat again + command: yum -y autoremove + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "yum" and + not is_atomic + + - name: remove package dependencies on redhat + command: dnf -y autoremove + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "dnf" and + not is_atomic + + - name: remove package dependencies on redhat again + command: dnf -y autoremove + when: + ansible_os_family == 'RedHat' and + ansible_pkg_mgr == "dnf" and + not is_atomic + + +- name: purge ceph directories + + vars: + mon_group_name: mons + osd_group_name: osds + mds_group_name: mdss + rgw_group_name: rgws + nfs_group_name: nfss + + hosts: + - "{{ mon_group_name }}" + - "{{ osd_group_name }}" + - "{{ mds_group_name }}" + - "{{ rgw_group_name }}" + - "{{ nfs_group_name }}" + + gather_facts: false # Already gathered previously + + become: true + + tasks: + - name: purge ceph directories for "{{ ansible_hostname }}" + file: + path: "{{ item }}" + state: absent + with_items: + - /etc/ceph + - /var/lib/ceph + - /var/log/ceph + + +- name: purge fetch directory + + hosts: + - localhost + + gather_facts: false + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: group_vars/all + + - name: purge fetch directory for localhost + file: + path: "{{ fetch_directory }}" + state: absent diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml new file mode 100644 index 000000000..bb0906875 --- /dev/null +++ b/infrastructure-playbooks/rolling_update.yml @@ -0,0 +1,233 @@ +--- +# This playbook does a rolling update for all the Ceph services +# Change the value of 'serial:' to adjust the number of server to be updated. +# +# The four roles that apply to the ceph hosts will be applied: ceph-common, +# ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc, +# will be applied as part of the rolling update process. +# + +# /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\ + +- name: confirm whether user really meant to upgrade the cluster + hosts: localhost + + vars_prompt: + - name: ireallymeanit + prompt: Are you sure you want to upgrade the cluster? + default: 'no' + private: no + + tasks: + - name: exit playbook, if user did not mean to upgrade cluster + fail: + msg: > + "Exiting rolling_update.yml playbook, cluster was NOT upgraded. + To upgrade the cluster, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' + +- hosts: + - mons + - osds + - mdss + - rgws + + become: True + tasks: + - debug: msg="gather facts on all Ceph hosts for following reference" + - name: check if sysvinit + stat: + path: /etc/rc?.d/S??ceph + follow: yes + register: is_sysvinit + + - name: check if upstart + stat: + path: /var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: is_upstart + + - name: check if systemd + command: grep -sq systemd /proc/1/comm + register: is_systemd + + +- hosts: mons + serial: 1 + become: True + vars: + upgrade_ceph_packages: True + mon_group_name: mons + + pre_tasks: + - name: compress the store as much as possible + command: ceph tell mon.{{ ansible_hostname }} compact + + roles: + - ceph-common + - ceph-mon + + post_tasks: + - name: restart ceph mons with upstart + service: + name: ceph-mon + state: restarted + args: id={{ ansible_hostname }} + when: is_upstart.stat.exists == True + + - name: restart ceph mons with sysvinit + service: + name: ceph + state: restarted + when: is_sysvinit.stat.exists == True + + - name: restart ceph mons with systemd + service: + name: ceph-mon@{{ ansible_hostname }} + state: restarted + enabled: yes + when: is_systemd + + - name: select a running monitor + set_fact: mon_host={{ item }} + with_items: groups.mons + when: item != inventory_hostname + + - name: waiting for the monitor to join the quorum... + shell: | + ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }} + register: result + until: result.rc == 0 + retries: 5 + delay: 10 + delegate_to: "{{ mon_host }}" + + +- hosts: osds + serial: 1 + become: True + vars: + upgrade_ceph_packages: True + osd_group_name: osds + + pre_tasks: + - name: set osd flags + command: ceph osd set {{ item }} + with_items: + - noout + - noscrub + - nodeep-scrub + delegate_to: "{{ groups.mons[0] }}" + + roles: + - ceph-common + - ceph-osd + + post_tasks: + - name: get osd numbers + shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" + register: osd_ids + changed_when: false + + - name: restart ceph osds (upstart) + service: + name: ceph-osd-all + state: restarted + when: is_upstart.stat.exists == True + + - name: restart ceph osds (sysvinit) + service: + name: ceph + state: restarted + when: is_sysvinit.stat.exists == True + + - name: restart ceph osds (systemd) + service: + name: ceph-osd@{{item}} + state: restarted + enabled: yes + with_items: "{{ osd_ids.stdout_lines }}" + when: is_systemd + + - name: waiting for clean pgs... + shell: | + test "$(ceph pg stat | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//;s/^.*://;s/ //')" && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN" + register: result + until: result.rc == 0 + retries: 10 + delay: 10 + delegate_to: "{{ groups.mons[0] }}" + + - name: unset osd flags + command: ceph osd unset {{ item }} + with_items: + - noout + - noscrub + - nodeep-scrub + delegate_to: "{{ groups.mons[0] }}" + + +- hosts: mdss + serial: 1 + become: True + vars: + upgrade_ceph_packages: True + mds_group_name: mdss + + roles: + - ceph-common + - ceph-mds + + post_tasks: + - name: restart ceph mdss with upstart + service: + name: ceph-mds + state: restarted + args: id={{ ansible_hostname }} + when: is_upstart.stat.exists == True + + - name: restart ceph mdss with sysvinit + service: + name: ceph + state: restarted + args: mds + when: is_sysvinit.stat.exists == True + + - name: restart ceph mdss with systemd + service: + name: ceph-mds@{{ ansible_hostname }} + state: restarted + enabled: yes + when: is_systemd + +- hosts: rgws + serial: 1 + become: True + vars: + upgrade_ceph_packages: True + rgw_group_name: rgws + + roles: + - ceph-common + - ceph-rgw + + post_tasks: + - name: restart ceph rgws with systemd + service: + name: ceph-radosgw@rgw.{{ ansible_hostname }} + state: restarted + enabled: yes + when: is_systemd + + - name: restart ceph rgws with sysvinit + service: + name: radosgw + state: restarted + when: ansible_os_family != 'RedHat' + + - name: restart rados gateway server(s) + service: + name: ceph-radosgw + state: restarted + when: ansible_os_family != 'RedHat' diff --git a/infrastructure-playbooks/shrink-mon.yml b/infrastructure-playbooks/shrink-mon.yml new file mode 100644 index 000000000..93f74c449 --- /dev/null +++ b/infrastructure-playbooks/shrink-mon.yml @@ -0,0 +1,142 @@ +--- +# This playbook shrinks the Ceph monitors from your cluster +# It can remove any number of monitor(s) from the cluster and ALL THEIR DATA +# +# Use it like this: +# ansible-playbook shrink-mon.yml -e mon_host=ceph-mon01,ceph-mon02 +# Prompts for confirmation to shrink, defaults to no and +# doesn't shrink the cluster. yes shrinks the cluster. +# +# ansible-playbook -e ireallymeanit=yes|no shrink-cluster.yml +# Overrides the prompt using -e option. Can be used in +# automation scripts to avoid interactive prompt. + + +- name: confirm whether user really meant to remove monitor(s) from the ceph cluster + + hosts: + - localhost + + gather_facts: false + become: true + + vars_prompt: + - name: ireallymeanit + prompt: Are you sure you want to shrink the cluster? + default: 'no' + private: no + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: group_vars/all + + - name: exit playbook, if user did not mean to shrink cluster + fail: + msg: "Exiting shrink-mon playbook, no monitor(s) was/were removed. + To shrink the cluster, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' + + - name: exit playbook, if no monitor(s) was/were given + fail: + msg: "mon_host must be declared + Exiting shrink-cluster playbook, no monitor(s) was/were removed. + On the command line when invoking the playbook, you can use + -e mon_host=ceph-mon01,ceph-mon02 argument." + when: mon_host is not defined + + - name: test if ceph command exist + command: command -v ceph + changed_when: false + failed_when: false + register: ceph_command + + - name: exit playbook, if ceph command does not exist + debug: + msg: "The ceph command is not available, please install it :(" + run_once: true + when: + - ceph_command.rc != 0 + + - name: exit playbook, if cluster files do not exist + stat: + path: "{{ item }}" + register: ceph_conf_key + with_items: + - /etc/ceph/{{ cluster }}.conf + - /etc/ceph/{{ cluster }}.client.admin.keyring + failed_when: false + + - fail: + msg: "Ceph's configuration file is not present in /etc/ceph" + with_items: "{{ceph_conf_key.results}}" + when: + - item.stat.exists == false + + - name: exit playbook, if can not connect to the cluster + command: timeout 5 ceph --cluster {{ cluster }} health + register: ceph_health + until: ceph_health.stdout.find("HEALTH") > -1 + retries: 5 + delay: 2 + + - name: verify given monitors are reachable + command: ping -c 1 {{ item }} + with_items: "{{mon_host.split(',')}}" + register: mon_reachable + failed_when: false + + - fail: + msg: "One or more monitors are not reachable, please check your /etc/hosts or your DNS" + with_items: "{{mon_reachable.results}}" + when: + - item.rc != 0 + + - name: stop monitor service (systemd) + service: + name: ceph-mon@{{ item }} + state: stopped + enabled: no + with_items: "{{mon_host.split(',')}}" + delegate_to: "{{item}}" + failed_when: false + + - name: purge monitor store + file: + path: /var/lib/ceph/mon/{{ cluster }}-{{ item }} + state: absent + with_items: "{{mon_host.split(',')}}" + delegate_to: "{{item}}" + + - name: remove monitor from the quorum + command: ceph --cluster {{ cluster }} mon remove {{ item }} + failed_when: false + with_items: "{{mon_host.split(',')}}" + + # NOTE (leseb): sorry for the 'sleep' command + # but it will take a couple of seconds for other monitors + # to notice that one member has left. + # 'sleep 5' is not that bad and should be sufficient + - name: verify the monitor is out of the cluster + shell: "sleep 5 && ceph --cluster {{ cluster }} -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ item }}" + with_items: "{{mon_host.split(',')}}" + failed_when: false + register: ceph_health_mon + + - name: please remove the monitor from your ceph configuration file + debug: + msg: "The monitor(s) has/have been successfully removed from the cluster. + Please remove the monitor(s) entry(ies) from the rest of your ceph configuration files, cluster wide." + run_once: true + with_items: "{{ceph_health_mon.results}}" + when: + - item.rc != 0 + + - name: please remove the monitor from your ceph configuration file + fail: + msg: "Monitor(s) appear(s) to still be part of the cluster, please check what happened." + run_once: true + with_items: "{{ceph_health_mon.results}}" + when: + - item.rc == 0 diff --git a/infrastructure-playbooks/shrink-osd.yml b/infrastructure-playbooks/shrink-osd.yml new file mode 100644 index 000000000..5fb1bd60f --- /dev/null +++ b/infrastructure-playbooks/shrink-osd.yml @@ -0,0 +1,131 @@ +--- +# This playbook shrinks Ceph OSDs. +# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA +# +# Use it like this: +# ansible-playbook shrink-osd.yml -e osd_id=0,2,6 +# Prompts for confirmation to shrink, defaults to no and +# doesn't shrink the cluster. yes shrinks the cluster. +# +# ansible-playbook -e ireallymeanit=yes|no shrink-osd.yml +# Overrides the prompt using -e option. Can be used in +# automation scripts to avoid interactive prompt. + + +- name: confirm whether user really meant to remove osd(s) from the cluster + + hosts: + - localhost + + gather_facts: false + become: true + + vars_prompt: + - name: ireallymeanit + prompt: Are you sure you want to shrink the cluster? + default: 'no' + private: no + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: group_vars/all + + - name: exit playbook, if user did not mean to shrink cluster + fail: + msg: "Exiting shrink-osd playbook, no osd(s) was/were removed.. + To shrink the cluster, either say 'yes' on the prompt or + or use `-e ireallymeanit=yes` on the command line when + invoking the playbook" + when: ireallymeanit != 'yes' + + - name: exit playbook, if no osd(s) was/were given + fail: + msg: "osd_ids must be declared + Exiting shrink-osd playbook, no OSD()s was/were removed. + On the command line when invoking the playbook, you can use + -e osd_ids=0,1,2,3 argument." + when: osd_ids is not defined + + - name: test if ceph command exist + command: command -v ceph + changed_when: false + failed_when: false + register: ceph_command + + - name: exit playbook, if ceph command does not exist + debug: + msg: "The ceph command is not available, please install it :(" + run_once: true + when: + - ceph_command.rc != 0 + + - name: exit playbook, if cluster files do not exist + stat: + path: "{{ item }}" + register: ceph_conf_key + with_items: + - /etc/ceph/{{ cluster }}.conf + - /etc/ceph/{{ cluster }}.client.admin.keyring + failed_when: false + + - fail: + msg: "Ceph's configuration file is not present in /etc/ceph" + with_items: "{{ceph_conf_key.results}}" + when: + - item.stat.exists == false + + - name: exit playbook, if can not connect to the cluster + command: timeout 5 ceph --cluster {{ cluster }} health + register: ceph_health + until: ceph_health.stdout.find("HEALTH") > -1 + retries: 5 + delay: 2 + +# NOTE (leseb): just in case, the complex filters mechanism below does not work anymore. +# This will be a quick and easy fix but will require using the shell module. +# - name: find the host where the osd(s) is/are running on +# shell: | +# ceph --cluster {{ cluster }} osd find {{ item }} | grep -Po '(?<="ip": ")[^:]*' +# with_items: "{{osd_ids.split(',')}}" +# register: osd_hosts +# + - name: find the host where the osd(s) is/are running on + command: ceph --cluster {{ cluster }} osd find {{ item }} + with_items: "{{osd_ids.split(',')}}" + register: osd_hosts + + - set_fact: ip_item="{{(item.stdout | from_json).ip}}" + with_items: "{{osd_hosts.results}}" + register: ip_result + + - set_fact: ips="{{ ip_result.results | map(attribute='ansible_facts.ip_item') | list }}" + + - set_fact: real_ips="{{ ips | regex_replace(':[0-9][0-9][0-9][0-9]\/[0-9][0-9][0-9][0-9]', '') }}" + + - name: check if ceph admin key exists on the osd nodes + stat: + path: "/etc/ceph/{{ cluster }}.client.admin.keyring" + register: ceph_admin_key + with_items: "{{real_ips}}" + delegate_to: "{{item}}" + failed_when: false + + - fail: + msg: "The Ceph admin key is not present on the OSD node, please add it and remove it after the playbook is done." + with_items: "{{ceph_admin_key.results}}" + when: + - item.stat.exists == false + + - name: deactivating osd(s) + command: ceph-disk deactivate --cluster {{ cluster }} --deactivate-by-id {{ item.0 }} --mark-out + with_together: + - "{{osd_ids.split(',')}}" + - "{{real_ips}}" + delegate_to: "{{item.1}}" + + - name: destroying osd(s) + command: ceph-disk destroy --cluster {{ cluster }} --destroy-by-id {{ item.0 }} --zap + with_together: + - "{{osd_ids.split(',')}}" + - "{{real_ips}}" + delegate_to: "{{item.1}}" diff --git a/infrastructure-playbooks/take-over-existing-cluster.yml b/infrastructure-playbooks/take-over-existing-cluster.yml new file mode 100644 index 000000000..ce4eaa48f --- /dev/null +++ b/infrastructure-playbooks/take-over-existing-cluster.yml @@ -0,0 +1,49 @@ +--- +# NOTE (leseb): +# The playbook aims to takeover a cluster that was not configured with +# ceph-ansible. +# +# The procedure is as follows: +# +# 1. Install Ansible and add your monitors and osds hosts in it. For more detailed information you can read the [Ceph Ansible Wiki](https://github.com/ceph/ceph-ansible/wiki) +# 2. Set `generate_fsid: false` in `group_vars` +# 3. Get your current cluster fsid with `ceph fsid` and set `fsid` accordingly in `group_vars` +# 4. Run the playbook called: `take-over-existing-cluster.yml` like this `ansible-playbook take-over-existing-cluster.yml`. +# 5. Eventually run Ceph Ansible to validate everything by doing: `ansible-playbook site.yml`. + +- hosts: mons + become: True + vars_files: + - roles/ceph-common/defaults/main.yml + - group_vars/all + roles: + - ceph-fetch-keys + +- hosts: all + become: true + + tasks: + - include_vars: roles/ceph-common/defaults/main.yml + - include_vars: group_vars/all + + - name: get the name of the existing ceph cluster + shell: "ls /etc/ceph/*.conf" + changed_when: false + register: ceph_conf + + - name: stat ceph.conf + stat: + path: "{{ ceph_conf.stdout }}" + register: ceph_conf_stat + + - name: generate ceph configuration file + action: config_template + args: + src: "roles/ceph-common/templates/ceph.conf.j2" + dest: "{{ ceph_conf.stdout }}" + owner: "{{ ceph_conf_stat.stat.pw_name }}" + group: "{{ ceph_conf_stat.stat.gr_name }}" + mode: "{{ ceph_conf_stat.stat.mode }}" + config_overrides: "{{ ceph_conf_overrides }}" + config_type: ini + diff --git a/localrepo-site.yml.sample b/localrepo-site.yml.sample deleted file mode 100644 index d7fdf8320..000000000 --- a/localrepo-site.yml.sample +++ /dev/null @@ -1,56 +0,0 @@ ---- -# Sample Playbook for local mirrors. -# Additional/optional step to generate repos.d file for local mirrors. -# Defines deployment design and assigns role to server groups - -- hosts: all - max_fail_percentage: 0 - become: True - vars: - repolist: - - { src: "//localmirror_ceph.repo", dest: "/etc/yum.repos.d/ceph.repo" } - tasks: - - name: Copy User provided repo files to /etc/yum.repos.d/ - copy: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - owner: root - group: root - with_items: - - "{{ repolist }}" - -- hosts: mons - become: True - roles: - - ceph-mon - -- hosts: agents - become: True - roles: - - ceph-agent - -- hosts: osds - become: True - roles: - - ceph-osd - -- hosts: mdss - become: True - roles: - - ceph-mds - -- hosts: rgws - become: True - roles: - - ceph-rgw - -- hosts: nfss - become: True - roles: - - ceph-nfs - -- hosts: restapis - become: True - roles: - - ceph-restapi - diff --git a/osd-configure.yml b/osd-configure.yml deleted file mode 100644 index a193dca10..000000000 --- a/osd-configure.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -# This playbook is used to add a new OSD to -# an existing cluster without the need for running -# the ceph-common or ceph-mon role again against all -# of the existing monitors. -# -# Ensure that all monitors are present in the mons -# group in your inventory so that the ceph.conf is -# created correctly for the new OSD. -- hosts: mons - become: True - roles: - - ceph-fetch-keys - -- hosts: osds - become: True - roles: - - ceph-osd diff --git a/purge-cluster.yml b/purge-cluster.yml deleted file mode 100644 index d5e001013..000000000 --- a/purge-cluster.yml +++ /dev/null @@ -1,453 +0,0 @@ ---- -# This playbook purges Ceph -# It removes: packages, configuration files and ALL THE DATA -# -# Use it like this: -# ansible-playbook purge-cluster.yml -# Prompts for confirmation to purge, defaults to no and -# doesn't purge the cluster. yes purges the cluster. -# -# ansible-playbook -e ireallymeanit=yes|no purge-cluster.yml -# Overrides the prompt using -e option. Can be used in -# automation scripts to avoid interactive prompt. - -- name: confirm whether user really meant to purge the cluster - hosts: localhost - - vars_prompt: - - name: ireallymeanit - prompt: Are you sure you want to purge the cluster? - default: 'no' - private: no - - tasks: - - name: exit playbook, if user did not mean to purge cluster - fail: - msg: > - "Exiting purge-cluster playbook, cluster was NOT purged. - To purge the cluster, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' - -- name: stop ceph cluster - hosts: - - mons - - osds - - mdss - - rgws - - nfss - - become: yes - - vars: - osd_group_name: osds - mon_group_name: mons - rgw_group_name: rgws - mds_group_name: mdss - nfs_group_name: nfss - rbdmirror_group_name: rbdmirrors - -# When set to true both groups of packages are purged. -# This can cause problem with qemu-kvm - purge_all_packages: true - -# When set to true and raw _multi_journal is used then block devices are also zapped - zap_block_devs: true - - ceph_packages: - - ceph - - ceph-common - - ceph-fs-common - - ceph-fuse - - ceph-mds - - ceph-release - - ceph-radosgw - - ceph_remaining_packages: - - libcephfs1 - - librados2 - - libradosstriper1 - - librbd1 - - python-cephfs - - python-rados - - python-rbd - - cluster: ceph # name of the cluster - monitor_name: "{{ ansible_hostname }}" - mds_name: "{{ ansible_hostname }}" - - - handlers: - - name: restart machine - shell: sleep 2 && shutdown -r now "Ansible updates triggered" - async: 1 - poll: 0 - ignore_errors: true - - - name: wait for server to boot - local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=400 - - - name: remove data - file: - path: /var/lib/ceph - state: absent - - tasks: - - name: check for a device list - fail: - msg: "OSD automatic discovery was detected, purge cluster does not support this scenario. If you want to purge the cluster, manually provide the list of devices in group_vars/osds using the devices variable." - when: - osd_group_name in group_names and - devices is not defined and - osd_auto_discovery - - - name: get osd numbers - shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" - register: osd_ids - changed_when: false - - - name: are we using systemd - shell: "if [ -d /usr/lib/systemd ] ; then find /usr/lib/systemd/system -name 'ceph*' | wc -l ; else echo 0 ; fi" - register: systemd_unit_files - -# after Hammer release - - - name: stop ceph.target with systemd - service: - name: ceph.target - state: stopped - enabled: no - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" - - - name: stop ceph-osd with systemd - service: - name: ceph-osd@{{item}} - state: stopped - enabled: no - with_items: "{{ osd_ids.stdout_lines }}" - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - osd_group_name in group_names - - - name: stop ceph mons with systemd - service: - name: ceph-mon@{{ ansible_hostname }} - state: stopped - enabled: no - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - mon_group_name in group_names - - - name: stop ceph mdss with systemd - service: - name: ceph-mds@{{ ansible_hostname }} - state: stopped - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - mds_group_name in group_names - - - name: stop ceph rgws with systemd - service: - name: ceph-radosgw@rgw.{{ ansible_hostname }} - state: stopped - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - rgw_group_name in group_names - - - name: stop ceph nfss with systemd - service: - name: nfs-ganesha - state: stopped - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - nfs_group_name in group_names - - - name: stop ceph rbd mirror with systemd - service: - name: ceph-rbd-mirror@admin.service - state: stopped - when: - ansible_os_family == 'RedHat' and - systemd_unit_files.stdout != "0" and - rbdmirror_group_name in group_names - -# before infernalis release, using sysvinit scripts -# we use this test so we do not have to know which RPM contains the boot script -# or where it is placed. - - - name: stop ceph osds - shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - osd_group_name in group_names - - - name: stop ceph mons - shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - mon_group_name in group_names - - - name: stop ceph mdss - shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - mds_group_name in group_names - - - name: stop ceph rgws - shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - rgw_group_name in group_names - - - name: stop ceph nfss - shell: "service nfs-ganesha status ; if [ $? == 0 ] ; then service nfs-ganesha stop ; else echo ; fi" - when: - ansible_os_family == 'RedHat' and - nfs_group_name in group_names - -# Ubuntu 14.04 - - name: stop ceph osds on ubuntu - shell: | - for id in $(ls /var/lib/ceph/osd/ |grep -oh '[0-9]*'); do - initctl stop ceph-osd cluster={{ cluster }} id=$id - done - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - osd_group_name in group_names - with_items: "{{ osd_ids.stdout_lines }}" - - - name: stop ceph mons on ubuntu - command: initctl stop ceph-mon cluster={{ cluster }} id={{ monitor_name }} - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - mon_group_name in group_names - - - name: stop ceph mdss on ubuntu - command: initctl stop ceph-mds cluster={{ cluster }} id={{ mds_name }} - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - mds_group_name in group_names - - - name: stop ceph rgws on ubuntu - command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }} - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - rgw_group_name in group_names - - - name: stop ceph nfss on ubuntu - command: initctl stop nfs-ganesha - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - nfs_group_name in group_names - - - name: stop ceph rbd mirror on ubuntu - command: initctl stop ceph-rbd-mirorr cluster={{ cluster }} id=admin - failed_when: false - when: - ansible_distribution == 'Ubuntu' and - rbdmirror_group_name in group_names - - - name: check for anything running ceph - shell: "ps awux | grep -- [c]eph-" - register: check_for_running_ceph - failed_when: check_for_running_ceph.rc == 0 - - - name: see if ceph-disk-created data partitions are present - shell: "ls /dev/disk/by-partlabel | grep -q 'ceph\\\\x20data'" - failed_when: false - register: ceph_data_partlabels - - - name: see if ceph-disk-created journal partitions are present - shell: "ls /dev/disk/by-partlabel | grep -q 'ceph\\\\x20journal'" - failed_when: false - register: ceph_journal_partlabels - - - name: get osd data mount points - shell: "(grep /var/lib/ceph/osd /proc/mounts || echo -n) | awk '{ print $2 }'" - register: mounted_osd - changed_when: false - - - name: drop all cache - shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches" - when: - osd_group_name in group_names - - - name: umount osd data partition - shell: umount {{ item }} - with_items: - - "{{ mounted_osd.stdout_lines }}" - when: - osd_group_name in group_names - - - name: remove osd mountpoint tree - file: - path: /var/lib/ceph/osd/ - state: absent - register: remove_osd_mountpoints - ignore_errors: true - when: - osd_group_name in group_names - - - name: remove monitor store and bootstrap keys - file: - path: /var/lib/ceph/ - state: absent - when: - mon_group_name in group_names - - - name: is reboot needed - local_action: shell echo requesting reboot - notify: - - restart machine - - wait for server to boot - - remove data - when: - osd_group_name in group_names and - remove_osd_mountpoints.failed is defined - - - name: see if ceph-disk is installed - shell: "which ceph-disk" - failed_when: false - register: ceph_disk_present - - - name: zap osd disks - shell: ceph-disk zap "{{ item }}" - with_items: "{{ devices | default([]) }}" - when: - osd_group_name in group_names and - ceph_disk_present.rc == 0 and - ceph_data_partlabels.rc == 0 and - zap_block_devs - - - name: zap journal devices - shell: ceph-disk zap "{{ item }}" - with_items: "{{ raw_journal_devices|default([])|unique }}" - when: - osd_group_name in group_names and - ceph_disk_present.rc == 0 and - ceph_journal_partlabels.rc == 0 and - zap_block_devs and - raw_multi_journal - - - name: purge ceph packages with yum - yum: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_packages }}" - when: - ansible_pkg_mgr == 'yum' - - - name: purge ceph packages with dnf - dnf: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_packages }}" - when: - ansible_pkg_mgr == 'dnf' - - - name: purge ceph packages with apt - apt: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_packages }}" - when: - ansible_pkg_mgr == 'apt' - - - name: purge remaining ceph packages with yum - yum: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_remaining_packages }}" - when: - ansible_pkg_mgr == 'yum' and - purge_all_packages == true - - - name: purge remaining ceph packages with dnf - dnf: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_remaining_packages }}" - when: - ansible_pkg_mgr == 'dnf' and - purge_all_packages == true - - - name: purge remaining ceph packages with apt - apt: - name: "{{ item }}" - state: absent - with_items: - - "{{ ceph_remaining_packages }}" - when: - ansible_pkg_mgr == 'apt' and - purge_all_packages == true - - - name: remove config - file: - path: /etc/ceph - state: absent - - - name: remove logs - file: - path: /var/log/ceph - state: absent - - - name: remove from SysV - shell: "update-rc.d -f ceph remove" - when: - ansible_distribution == 'Ubuntu' - - - name: remove Upstart nad SysV files - shell: "find /etc -name '*ceph*' -delete" - when: - ansible_distribution == 'Ubuntu' - - - name: remove Upstart and apt logs and cache - shell: "find /var -name '*ceph*' -delete" - when: - ansible_distribution == 'Ubuntu' - - - name: request data removal - local_action: shell echo requesting data removal - become: false - notify: - - remove data - - - name: purge dnf cache - command: dnf clean all - when: - ansible_pkg_mgr == 'dnf' - - - name: purge RPM cache in /tmp - file: - path: /tmp/rh-storage-repo - state: absent - - - name: clean apt - shell: apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - when: - ansible_pkg_mgr == 'apt' - - - name: purge rh_storage.repo file in /etc/yum.repos.d - file: - path: /etc/yum.repos.d/rh_storage.repo - state: absent - when: - ansible_os_family == 'RedHat' diff --git a/purge-docker-cluster.yml b/purge-docker-cluster.yml deleted file mode 100644 index 3f33cbdac..000000000 --- a/purge-docker-cluster.yml +++ /dev/null @@ -1,540 +0,0 @@ ---- -# This playbook purges a containerized Ceph cluster -# It removes: packages, containers, configuration files and ALL THE DATA - -- name: confirm whether user really meant to purge the cluster - - hosts: - - localhost - - gather_facts: false - - vars_prompt: - - name: ireallymeanit - prompt: Are you sure you want to purge the cluster? - default: 'no' - private: no - - - name: remove_packages - prompt: > - If --skip-tags=with_pkg is not set docker packages - and more will be uninstalled from non-atomic hosts. - Do you want to continue? - default: 'no' - private: no - - tasks: - - name: exit playbook, if user did not mean to purge cluster - fail: - msg: > - "Exiting purge-docker-cluster playbook, cluster was NOT purged. - To purge the cluster, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' - - - name: exit playbook, if user did not mean to remove packages - fail: - msg: > - "Exiting purge-docker-cluster playbook. No packages were removed. - To skip removing packages use --skip-tag=with_pkg. To continue - with removing packages, do not specify --skip-tag=with_pkg and - either say 'yes' on the prompt or use `-e remove_packages=yes` - on the command line when invoking the playbook" - when: remove_packages != 'yes' - - -- name: purge ceph mds cluster - - vars: - mds_group_name: mdss - - hosts: - - "{{ mds_group_name }}" - - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mds/defaults/main.yml - - include_vars: group_vars/all - - - name: disable ceph mds service - service: - name: "ceph-mds@{{ ansible_hostname }}" - state: stopped - enabled: no - ignore_errors: true - - - name: remove ceph mds container - docker: - image: "{{ ceph_mds_docker_username }}/{{ ceph_mds_docker_imagename }}:{{ ceph_mds_docker_image_tag }}" - name: "{{ ansible_hostname }}" - state: absent - ignore_errors: true - - - name: remove ceph mds service - file: - path: /etc/systemd/system/ceph-mds@.service - state: absent - - - name: remove ceph mds image - docker_image: - state: absent - name: "{{ ceph_mds_docker_username }}/{{ ceph_mds_docker_imagename }}" - tag: "{{ ceph_mds_docker_image_tag }}" - tags: - remove_img - - -- name: purge ceph rgw cluster - - vars: - rgw_group_name: rgws - - hosts: - - "{{ rgw_group_name }}" - - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-rgw/defaults/main.yml - - include_vars: group_vars/all - - - name: disable ceph rgw service - service: - name: "ceph-rgw@{{ ansible_hostname }}" - state: stopped - enabled: no - ignore_errors: true - - - name: remove ceph rgw container - docker: - image: "{{ ceph_rgw_docker_username }}/{{ ceph_rgw_docker_imagename }}:{{ ceph_rgw_docker_image_tag }}" - name: "{{ ansible_hostname }}" - state: absent - ignore_errors: true - - - name: remove ceph rgw service - file: - path: /etc/systemd/system/ceph-rgw@.service - state: absent - - - name: remove ceph rgw image - docker_image: - state: absent - name: "{{ ceph_rgw_docker_username }}/{{ ceph_rgw_docker_imagename }}" - tag: "{{ ceph_rgw_docker_image_tag }}" - tags: - remove_img - - -- name: purge ceph nfs cluster - - vars: - nfs_group_name: nfss - - hosts: - - "{{ nfs_group_name }}" - - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-nfs/defaults/main.yml - - include_vars: group_vars/all - - - name: disable ceph nfs service - service: - name: "ceph-nfs@{{ ansible_hostname }}" - state: stopped - enabled: no - ignore_errors: true - - - name: remove ceph nfs container - docker: - image: "{{ ceph_nfs_docker_username }}/{{ ceph_nfs_docker_imagename }}:{{ ceph_nfs_docker_image_tag }}" - name: "{{ ansible_hostname }}" - state: absent - ignore_errors: true - - - name: remove ceph nfs service - file: - path: /etc/systemd/system/ceph-nfs@.service - state: absent - - - name: remove ceph nfs directories for "{{ ansible_hostname }}" - file: - path: "{{ item }}" - state: absent - with_items: - - /etc/ganesha - - /var/lib/nfs/ganesha - - /var/run/ganesha - - - name: remove ceph nfs image - docker_image: - state: absent - name: "{{ ceph_nfs_docker_username }}/{{ ceph_nfs_docker_imagename }}" - tag: "{{ ceph_nfs_docker_image_tag }}" - tags: - remove_img - - -- name: purge ceph osd cluster - - vars: - osd_group_name: osds - - hosts: - - "{{ osd_group_name }}" - - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-osd/defaults/main.yml - - include_vars: group_vars/all - - - name: disable ceph osd service - service: - name: "ceph-osd@{{ item | basename }}" - state: stopped - enabled: no - with_items: "{{ ceph_osd_docker_devices }}" - ignore_errors: true - - - name: remove ceph osd prepare container - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-prepare-{{ item | regex_replace('/', '') }}" - state: absent - with_items: "{{ ceph_osd_docker_devices }}" - ignore_errors: true - - - name: remove ceph osd container - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-{{ item | regex_replace('/', '') }}" - state: absent - with_items: "{{ ceph_osd_docker_devices }}" - ignore_errors: true - - - name: zap ceph osd disk - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" - net: host - pid: host - state: started - privileged: yes - env: "CEPH_DAEMON=zap_device,OSD_DEVICE={{ item }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/dev:/dev,/run:/run" - with_items: "{{ ceph_osd_docker_devices }}" - - - name: remove ceph osd zap disk container - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" - state: absent - with_items: "{{ ceph_osd_docker_devices }}" - - # zap twice - - name: zap ceph osd disk - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" - net: host - pid: host - state: started - privileged: yes - env: "CEPH_DAEMON=zap_device,OSD_DEVICE={{ item }}" - volumes: "/var/lib/ceph:/var/lib/ceph,/etc/ceph:/etc/ceph,/dev:/dev,/run:/run" - with_items: "{{ ceph_osd_docker_devices }}" - - - name: remove ceph osd zap disk container - docker: - image: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}:{{ ceph_osd_docker_image_tag }}" - name: "{{ ansible_hostname }}-osd-zap-{{ item | regex_replace('/', '') }}" - state: absent - with_items: "{{ ceph_osd_docker_devices }}" - - - name: remove ceph osd service - file: - path: /etc/systemd/system/ceph-osd@.service - state: absent - - - name: remove ceph osd image - docker_image: - state: absent - name: "{{ ceph_osd_docker_username }}/{{ ceph_osd_docker_imagename }}" - tag: "{{ ceph_osd_docker_image_tag }}" - tags: - remove_img - - -- name: purge ceph mon cluster - - vars: - mon_group_name: mons - - hosts: - - "{{ mon_group_name }}" - - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mon/defaults/main.yml - - include_vars: roles/ceph-restapi/defaults/main.yml - - include_vars: group_vars/all - - - name: disable ceph mon service - service: - name: "ceph-mon@{{ ansible_hostname }}" - state: stopped - enabled: no - ignore_errors: true - - - name: remove ceph mon container - docker: - image: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}:{{ ceph_mon_docker_image_tag }}" - name: "{{ ansible_hostname }}" - state: absent - ignore_errors: true - - - name: remove restapi container - docker: - image: "{{ ceph_restapi_docker_username }}/{{ ceph_restapi_docker_imagename }}:{{ ceph_restapi_docker_image_tag }}" - name: "{{ ansible_hostname }}-ceph-restapi" - state: absent - - - name: remove ceph mon service - file: - path: /etc/systemd/system/ceph-mon@.service - state: absent - - - name: remove ceph mon image - docker_image: - state: absent - name: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}" - tag: "{{ ceph_mon_docker_image_tag }}" - tags: - remove_img - - -- name: remove installed packages - - vars: - mon_group_name: mons - osd_group_name: osds - mds_group_name: mdss - rgw_group_name: rgws - nfs_group_name: nfss - - hosts: - - "{{ mon_group_name }}" - - "{{ osd_group_name }}" - - "{{ mds_group_name }}" - - "{{ rgw_group_name }}" - - "{{ nfs_group_name }}" - - become: true - - tags: - with_pkg - - tasks: - - name: check if it is Atomic host - stat: path=/run/ostree-booted - register: stat_ostree - - - name: set fact for using Atomic host - set_fact: - is_atomic: "{{ stat_ostree.stat.exists }}" - - - name: stop docker service - service: - name: docker - state: stopped - enabled: no - when: not is_atomic - - - name: remove docker-py - pip: - name: docker-py - version: 1.1.0 - state: absent - when: - ansible_version['full'] | version_compare('2.1.0.0', '<') and - not is_atomic - - - name: remove docker-py - pip: - name: docker-py - state: absent - when: - ansible_version['full'] | version_compare('2.1.0.0', '>=') and - not is_atomic - - - name: remove six - pip: - name: six - version: 1.9.0 - state: absent - when: not is_atomic - - - name: remove pip and docker on ubuntu - apt: - name: "{{ item }}" - state: absent - update_cache: yes - autoremove: yes - with_items: - - python-pip - - docker - - docker.io - when: ansible_distribution == 'Ubuntu' - - - name: remove pip and docker on debian - apt: - name: "{{ item }}" - state: absent - update_cache: yes - autoremove: yes - with_items: - - python-pip - - docker-engine - when: ansible_distribution == 'Debian' - - - name: remove epel-release on redhat - yum: - name: epel-release - state: absent - when: - ansible_os_family == 'RedHat' and - not is_atomic - - - name: remove pip on redhat - yum: - name: "{{ item }}" - state: absent - with_items: - - python-pip - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "yum" and - not is_atomic - - - name: remove docker-engine on redhat - yum: - name: "{{ item }}" - state: absent - with_items: - - docker-engine - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "yum" and - not is_atomic - - # for CentOS - - name: remove docker on redhat - yum: - name: "{{ item }}" - state: absent - with_items: - - docker - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "yum" and - not is_atomic - - - name: remove pip and docker on redhat - dnf: - name: "{{ item }}" - state: absent - with_items: - - python-pip - - docker-engine - - docker - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "dnf" and - not is_atomic - - - name: remove package dependencies on redhat - command: yum -y autoremove - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "yum" and - not is_atomic - - - name: remove package dependencies on redhat again - command: yum -y autoremove - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "yum" and - not is_atomic - - - name: remove package dependencies on redhat - command: dnf -y autoremove - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "dnf" and - not is_atomic - - - name: remove package dependencies on redhat again - command: dnf -y autoremove - when: - ansible_os_family == 'RedHat' and - ansible_pkg_mgr == "dnf" and - not is_atomic - - -- name: purge ceph directories - - vars: - mon_group_name: mons - osd_group_name: osds - mds_group_name: mdss - rgw_group_name: rgws - nfs_group_name: nfss - - hosts: - - "{{ mon_group_name }}" - - "{{ osd_group_name }}" - - "{{ mds_group_name }}" - - "{{ rgw_group_name }}" - - "{{ nfs_group_name }}" - - gather_facts: false # Already gathered previously - - become: true - - tasks: - - name: purge ceph directories for "{{ ansible_hostname }}" - file: - path: "{{ item }}" - state: absent - with_items: - - /etc/ceph - - /var/lib/ceph - - /var/log/ceph - - -- name: purge fetch directory - - hosts: - - localhost - - gather_facts: false - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: group_vars/all - - - name: purge fetch directory for localhost - file: - path: "{{ fetch_directory }}" - state: absent diff --git a/rolling_update.yml b/rolling_update.yml deleted file mode 100644 index bb0906875..000000000 --- a/rolling_update.yml +++ /dev/null @@ -1,233 +0,0 @@ ---- -# This playbook does a rolling update for all the Ceph services -# Change the value of 'serial:' to adjust the number of server to be updated. -# -# The four roles that apply to the ceph hosts will be applied: ceph-common, -# ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc, -# will be applied as part of the rolling update process. -# - -# /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\ - -- name: confirm whether user really meant to upgrade the cluster - hosts: localhost - - vars_prompt: - - name: ireallymeanit - prompt: Are you sure you want to upgrade the cluster? - default: 'no' - private: no - - tasks: - - name: exit playbook, if user did not mean to upgrade cluster - fail: - msg: > - "Exiting rolling_update.yml playbook, cluster was NOT upgraded. - To upgrade the cluster, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' - -- hosts: - - mons - - osds - - mdss - - rgws - - become: True - tasks: - - debug: msg="gather facts on all Ceph hosts for following reference" - - name: check if sysvinit - stat: - path: /etc/rc?.d/S??ceph - follow: yes - register: is_sysvinit - - - name: check if upstart - stat: - path: /var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart - register: is_upstart - - - name: check if systemd - command: grep -sq systemd /proc/1/comm - register: is_systemd - - -- hosts: mons - serial: 1 - become: True - vars: - upgrade_ceph_packages: True - mon_group_name: mons - - pre_tasks: - - name: compress the store as much as possible - command: ceph tell mon.{{ ansible_hostname }} compact - - roles: - - ceph-common - - ceph-mon - - post_tasks: - - name: restart ceph mons with upstart - service: - name: ceph-mon - state: restarted - args: id={{ ansible_hostname }} - when: is_upstart.stat.exists == True - - - name: restart ceph mons with sysvinit - service: - name: ceph - state: restarted - when: is_sysvinit.stat.exists == True - - - name: restart ceph mons with systemd - service: - name: ceph-mon@{{ ansible_hostname }} - state: restarted - enabled: yes - when: is_systemd - - - name: select a running monitor - set_fact: mon_host={{ item }} - with_items: groups.mons - when: item != inventory_hostname - - - name: waiting for the monitor to join the quorum... - shell: | - ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }} - register: result - until: result.rc == 0 - retries: 5 - delay: 10 - delegate_to: "{{ mon_host }}" - - -- hosts: osds - serial: 1 - become: True - vars: - upgrade_ceph_packages: True - osd_group_name: osds - - pre_tasks: - - name: set osd flags - command: ceph osd set {{ item }} - with_items: - - noout - - noscrub - - nodeep-scrub - delegate_to: "{{ groups.mons[0] }}" - - roles: - - ceph-common - - ceph-osd - - post_tasks: - - name: get osd numbers - shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" - register: osd_ids - changed_when: false - - - name: restart ceph osds (upstart) - service: - name: ceph-osd-all - state: restarted - when: is_upstart.stat.exists == True - - - name: restart ceph osds (sysvinit) - service: - name: ceph - state: restarted - when: is_sysvinit.stat.exists == True - - - name: restart ceph osds (systemd) - service: - name: ceph-osd@{{item}} - state: restarted - enabled: yes - with_items: "{{ osd_ids.stdout_lines }}" - when: is_systemd - - - name: waiting for clean pgs... - shell: | - test "$(ceph pg stat | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//;s/^.*://;s/ //')" && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN" - register: result - until: result.rc == 0 - retries: 10 - delay: 10 - delegate_to: "{{ groups.mons[0] }}" - - - name: unset osd flags - command: ceph osd unset {{ item }} - with_items: - - noout - - noscrub - - nodeep-scrub - delegate_to: "{{ groups.mons[0] }}" - - -- hosts: mdss - serial: 1 - become: True - vars: - upgrade_ceph_packages: True - mds_group_name: mdss - - roles: - - ceph-common - - ceph-mds - - post_tasks: - - name: restart ceph mdss with upstart - service: - name: ceph-mds - state: restarted - args: id={{ ansible_hostname }} - when: is_upstart.stat.exists == True - - - name: restart ceph mdss with sysvinit - service: - name: ceph - state: restarted - args: mds - when: is_sysvinit.stat.exists == True - - - name: restart ceph mdss with systemd - service: - name: ceph-mds@{{ ansible_hostname }} - state: restarted - enabled: yes - when: is_systemd - -- hosts: rgws - serial: 1 - become: True - vars: - upgrade_ceph_packages: True - rgw_group_name: rgws - - roles: - - ceph-common - - ceph-rgw - - post_tasks: - - name: restart ceph rgws with systemd - service: - name: ceph-radosgw@rgw.{{ ansible_hostname }} - state: restarted - enabled: yes - when: is_systemd - - - name: restart ceph rgws with sysvinit - service: - name: radosgw - state: restarted - when: ansible_os_family != 'RedHat' - - - name: restart rados gateway server(s) - service: - name: ceph-radosgw - state: restarted - when: ansible_os_family != 'RedHat' diff --git a/shrink-mon.yml b/shrink-mon.yml deleted file mode 100644 index 93f74c449..000000000 --- a/shrink-mon.yml +++ /dev/null @@ -1,142 +0,0 @@ ---- -# This playbook shrinks the Ceph monitors from your cluster -# It can remove any number of monitor(s) from the cluster and ALL THEIR DATA -# -# Use it like this: -# ansible-playbook shrink-mon.yml -e mon_host=ceph-mon01,ceph-mon02 -# Prompts for confirmation to shrink, defaults to no and -# doesn't shrink the cluster. yes shrinks the cluster. -# -# ansible-playbook -e ireallymeanit=yes|no shrink-cluster.yml -# Overrides the prompt using -e option. Can be used in -# automation scripts to avoid interactive prompt. - - -- name: confirm whether user really meant to remove monitor(s) from the ceph cluster - - hosts: - - localhost - - gather_facts: false - become: true - - vars_prompt: - - name: ireallymeanit - prompt: Are you sure you want to shrink the cluster? - default: 'no' - private: no - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: group_vars/all - - - name: exit playbook, if user did not mean to shrink cluster - fail: - msg: "Exiting shrink-mon playbook, no monitor(s) was/were removed. - To shrink the cluster, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' - - - name: exit playbook, if no monitor(s) was/were given - fail: - msg: "mon_host must be declared - Exiting shrink-cluster playbook, no monitor(s) was/were removed. - On the command line when invoking the playbook, you can use - -e mon_host=ceph-mon01,ceph-mon02 argument." - when: mon_host is not defined - - - name: test if ceph command exist - command: command -v ceph - changed_when: false - failed_when: false - register: ceph_command - - - name: exit playbook, if ceph command does not exist - debug: - msg: "The ceph command is not available, please install it :(" - run_once: true - when: - - ceph_command.rc != 0 - - - name: exit playbook, if cluster files do not exist - stat: - path: "{{ item }}" - register: ceph_conf_key - with_items: - - /etc/ceph/{{ cluster }}.conf - - /etc/ceph/{{ cluster }}.client.admin.keyring - failed_when: false - - - fail: - msg: "Ceph's configuration file is not present in /etc/ceph" - with_items: "{{ceph_conf_key.results}}" - when: - - item.stat.exists == false - - - name: exit playbook, if can not connect to the cluster - command: timeout 5 ceph --cluster {{ cluster }} health - register: ceph_health - until: ceph_health.stdout.find("HEALTH") > -1 - retries: 5 - delay: 2 - - - name: verify given monitors are reachable - command: ping -c 1 {{ item }} - with_items: "{{mon_host.split(',')}}" - register: mon_reachable - failed_when: false - - - fail: - msg: "One or more monitors are not reachable, please check your /etc/hosts or your DNS" - with_items: "{{mon_reachable.results}}" - when: - - item.rc != 0 - - - name: stop monitor service (systemd) - service: - name: ceph-mon@{{ item }} - state: stopped - enabled: no - with_items: "{{mon_host.split(',')}}" - delegate_to: "{{item}}" - failed_when: false - - - name: purge monitor store - file: - path: /var/lib/ceph/mon/{{ cluster }}-{{ item }} - state: absent - with_items: "{{mon_host.split(',')}}" - delegate_to: "{{item}}" - - - name: remove monitor from the quorum - command: ceph --cluster {{ cluster }} mon remove {{ item }} - failed_when: false - with_items: "{{mon_host.split(',')}}" - - # NOTE (leseb): sorry for the 'sleep' command - # but it will take a couple of seconds for other monitors - # to notice that one member has left. - # 'sleep 5' is not that bad and should be sufficient - - name: verify the monitor is out of the cluster - shell: "sleep 5 && ceph --cluster {{ cluster }} -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ item }}" - with_items: "{{mon_host.split(',')}}" - failed_when: false - register: ceph_health_mon - - - name: please remove the monitor from your ceph configuration file - debug: - msg: "The monitor(s) has/have been successfully removed from the cluster. - Please remove the monitor(s) entry(ies) from the rest of your ceph configuration files, cluster wide." - run_once: true - with_items: "{{ceph_health_mon.results}}" - when: - - item.rc != 0 - - - name: please remove the monitor from your ceph configuration file - fail: - msg: "Monitor(s) appear(s) to still be part of the cluster, please check what happened." - run_once: true - with_items: "{{ceph_health_mon.results}}" - when: - - item.rc == 0 diff --git a/shrink-osd.yml b/shrink-osd.yml deleted file mode 100644 index 5fb1bd60f..000000000 --- a/shrink-osd.yml +++ /dev/null @@ -1,131 +0,0 @@ ---- -# This playbook shrinks Ceph OSDs. -# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA -# -# Use it like this: -# ansible-playbook shrink-osd.yml -e osd_id=0,2,6 -# Prompts for confirmation to shrink, defaults to no and -# doesn't shrink the cluster. yes shrinks the cluster. -# -# ansible-playbook -e ireallymeanit=yes|no shrink-osd.yml -# Overrides the prompt using -e option. Can be used in -# automation scripts to avoid interactive prompt. - - -- name: confirm whether user really meant to remove osd(s) from the cluster - - hosts: - - localhost - - gather_facts: false - become: true - - vars_prompt: - - name: ireallymeanit - prompt: Are you sure you want to shrink the cluster? - default: 'no' - private: no - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: group_vars/all - - - name: exit playbook, if user did not mean to shrink cluster - fail: - msg: "Exiting shrink-osd playbook, no osd(s) was/were removed.. - To shrink the cluster, either say 'yes' on the prompt or - or use `-e ireallymeanit=yes` on the command line when - invoking the playbook" - when: ireallymeanit != 'yes' - - - name: exit playbook, if no osd(s) was/were given - fail: - msg: "osd_ids must be declared - Exiting shrink-osd playbook, no OSD()s was/were removed. - On the command line when invoking the playbook, you can use - -e osd_ids=0,1,2,3 argument." - when: osd_ids is not defined - - - name: test if ceph command exist - command: command -v ceph - changed_when: false - failed_when: false - register: ceph_command - - - name: exit playbook, if ceph command does not exist - debug: - msg: "The ceph command is not available, please install it :(" - run_once: true - when: - - ceph_command.rc != 0 - - - name: exit playbook, if cluster files do not exist - stat: - path: "{{ item }}" - register: ceph_conf_key - with_items: - - /etc/ceph/{{ cluster }}.conf - - /etc/ceph/{{ cluster }}.client.admin.keyring - failed_when: false - - - fail: - msg: "Ceph's configuration file is not present in /etc/ceph" - with_items: "{{ceph_conf_key.results}}" - when: - - item.stat.exists == false - - - name: exit playbook, if can not connect to the cluster - command: timeout 5 ceph --cluster {{ cluster }} health - register: ceph_health - until: ceph_health.stdout.find("HEALTH") > -1 - retries: 5 - delay: 2 - -# NOTE (leseb): just in case, the complex filters mechanism below does not work anymore. -# This will be a quick and easy fix but will require using the shell module. -# - name: find the host where the osd(s) is/are running on -# shell: | -# ceph --cluster {{ cluster }} osd find {{ item }} | grep -Po '(?<="ip": ")[^:]*' -# with_items: "{{osd_ids.split(',')}}" -# register: osd_hosts -# - - name: find the host where the osd(s) is/are running on - command: ceph --cluster {{ cluster }} osd find {{ item }} - with_items: "{{osd_ids.split(',')}}" - register: osd_hosts - - - set_fact: ip_item="{{(item.stdout | from_json).ip}}" - with_items: "{{osd_hosts.results}}" - register: ip_result - - - set_fact: ips="{{ ip_result.results | map(attribute='ansible_facts.ip_item') | list }}" - - - set_fact: real_ips="{{ ips | regex_replace(':[0-9][0-9][0-9][0-9]\/[0-9][0-9][0-9][0-9]', '') }}" - - - name: check if ceph admin key exists on the osd nodes - stat: - path: "/etc/ceph/{{ cluster }}.client.admin.keyring" - register: ceph_admin_key - with_items: "{{real_ips}}" - delegate_to: "{{item}}" - failed_when: false - - - fail: - msg: "The Ceph admin key is not present on the OSD node, please add it and remove it after the playbook is done." - with_items: "{{ceph_admin_key.results}}" - when: - - item.stat.exists == false - - - name: deactivating osd(s) - command: ceph-disk deactivate --cluster {{ cluster }} --deactivate-by-id {{ item.0 }} --mark-out - with_together: - - "{{osd_ids.split(',')}}" - - "{{real_ips}}" - delegate_to: "{{item.1}}" - - - name: destroying osd(s) - command: ceph-disk destroy --cluster {{ cluster }} --destroy-by-id {{ item.0 }} --zap - with_together: - - "{{osd_ids.split(',')}}" - - "{{real_ips}}" - delegate_to: "{{item.1}}" diff --git a/take-over-existing-cluster.yml b/take-over-existing-cluster.yml deleted file mode 100644 index ce4eaa48f..000000000 --- a/take-over-existing-cluster.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -# NOTE (leseb): -# The playbook aims to takeover a cluster that was not configured with -# ceph-ansible. -# -# The procedure is as follows: -# -# 1. Install Ansible and add your monitors and osds hosts in it. For more detailed information you can read the [Ceph Ansible Wiki](https://github.com/ceph/ceph-ansible/wiki) -# 2. Set `generate_fsid: false` in `group_vars` -# 3. Get your current cluster fsid with `ceph fsid` and set `fsid` accordingly in `group_vars` -# 4. Run the playbook called: `take-over-existing-cluster.yml` like this `ansible-playbook take-over-existing-cluster.yml`. -# 5. Eventually run Ceph Ansible to validate everything by doing: `ansible-playbook site.yml`. - -- hosts: mons - become: True - vars_files: - - roles/ceph-common/defaults/main.yml - - group_vars/all - roles: - - ceph-fetch-keys - -- hosts: all - become: true - - tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: group_vars/all - - - name: get the name of the existing ceph cluster - shell: "ls /etc/ceph/*.conf" - changed_when: false - register: ceph_conf - - - name: stat ceph.conf - stat: - path: "{{ ceph_conf.stdout }}" - register: ceph_conf_stat - - - name: generate ceph configuration file - action: config_template - args: - src: "roles/ceph-common/templates/ceph.conf.j2" - dest: "{{ ceph_conf.stdout }}" - owner: "{{ ceph_conf_stat.stat.pw_name }}" - group: "{{ ceph_conf_stat.stat.gr_name }}" - mode: "{{ ceph_conf_stat.stat.mode }}" - config_overrides: "{{ ceph_conf_overrides }}" - config_type: ini -