From 3ff17f1c8fa1e2193a003808cad9ba444aba2d42 Mon Sep 17 00:00:00 2001 From: Ivan Font Date: Sat, 5 Nov 2016 21:15:26 -0700 Subject: [PATCH] Support containerized rolling update - Update rolling update playbook to support containerized deployments for mons, osds, mdss, and rgws - Skip checking if existing cluster is running when performing a rolling update - Fixed bug where we were failing to start the mds container because it was missing the admin keyring. The admin keyring was missing because it was not being pushed from the mon host to the ansible host due to the keyring not being available before running the copy_configs.yml task include file. Now we forcefully wait for the admin keyring to be generated before continuing with the copy_configs.yml task include file - Skip pre_requisite.yml when running on atomic host. This technically no longer requires specifying to skip tasks containing the with_pkg tag - Add missing variables to all.docker.sample - Misc. cleanup Signed-off-by: Ivan Font --- group_vars/all.docker.sample | 3 + infrastructure-playbooks/rolling_update.yml | 191 +++++++++--------- roles/ceph-mds/tasks/docker/main.yml | 7 +- roles/ceph-mon/tasks/docker/main.yml | 10 + .../tasks/docker/start_docker_monitor.yml | 5 - roles/ceph-nfs/tasks/docker/main.yml | 1 + roles/ceph-osd/tasks/docker/main.yml | 4 +- roles/ceph-rbd-mirror/tasks/docker/main.yml | 1 + roles/ceph-restapi/tasks/docker/main.yml | 1 + roles/ceph-rgw/tasks/docker/main.yml | 7 +- 10 files changed, 129 insertions(+), 101 deletions(-) diff --git a/group_vars/all.docker.sample b/group_vars/all.docker.sample index c30b8f403..d7e252ea9 100644 --- a/group_vars/all.docker.sample +++ b/group_vars/all.docker.sample @@ -39,6 +39,9 @@ dummy: #ceph_osd_docker_devices: # - /dev/sdb # - /dev/sdc +#journal_size: 5120 # OSD journal size in MB +#public_network: 0.0.0.0/0 +#cluster_network: "{{ public_network }}" ####### # MDS # diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml index 3a4b39f6e..9f8394c08 100644 --- a/infrastructure-playbooks/rolling_update.yml +++ b/infrastructure-playbooks/rolling_update.yml @@ -44,6 +44,9 @@ become: True tasks: - debug: msg="gather facts on all Ceph hosts for following reference" + + - set_fact: rolling_update=true + - name: check if sysvinit stat: path: /etc/rc?.d/S??ceph @@ -64,7 +67,6 @@ vars: mon_group_name: mons - restapi_group_name: restapis health_mon_check_retries: 5 health_mon_check_delay: 10 upgrade_ceph_packages: True @@ -76,16 +78,6 @@ become: True pre_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mon/defaults/main.yml - - include_vars: roles/ceph-restapi/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ mon_group_name }} - failed_when: false - - include_vars: group_vars/{{ restapi_group_name }} - failed_when: false - - name: stop ceph mons with upstart service: name: ceph-mon @@ -107,20 +99,9 @@ when: is_systemd roles: - - ceph-common - ceph-mon post_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mon/defaults/main.yml - - include_vars: roles/ceph-restapi/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ mon_group_name }} - failed_when: false - - include_vars: group_vars/{{ restapi_group_name }} - failed_when: false - - name: start ceph mons with upstart service: name: ceph-mon @@ -141,10 +122,21 @@ enabled: yes when: is_systemd - - name: select a running monitor + - name: set mon_host_count + set_fact: mon_host_count={{ groups.mons | length }} + + - name: select a running monitor if multiple monitors set_fact: mon_host={{ item }} with_items: "{{ groups.mons }}" - when: item != inventory_hostname + when: + - mon_host_count | int > 1 + - item != inventory_hostname + + - name: select first monitor if only one monitor + set_fact: mon_host={{ item }} + with_items: "{{ groups.mons[0] }}" + when: + - mon_host_count | int == 1 - name: waiting for the monitor to join the quorum... shell: | @@ -154,6 +146,17 @@ retries: "{{ health_mon_check_retries }}" delay: "{{ health_mon_check_delay }}" delegate_to: "{{ mon_host }}" + when: not mon_containerized_deployment + + - name: waiting for the containerized monitor to join the quorum... + shell: | + docker exec {{ hostvars[mon_host]['ansible_hostname'] }} ceph -s --cluster {{ cluster }} | grep quorum | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }} + register: result + until: result.rc == 0 + retries: "{{ health_mon_check_retries }}" + delay: "{{ health_mon_check_delay }}" + delegate_to: "{{ mon_host }}" + when: mon_containerized_deployment - name: upgrade ceph osds cluster @@ -171,13 +174,6 @@ become: True pre_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-osd/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ osd_group_name }} - failed_when: false - - name: set osd flags command: ceph osd set {{ item }} --cluster {{ cluster }} with_items: @@ -190,63 +186,71 @@ shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" register: osd_ids changed_when: false + when: not osd_containerized_deployment - - name: stop ceph osds (upstart) + - name: stop ceph osds with upstart service: name: ceph-osd-all state: stopped when: is_upstart.stat.exists == True - - name: stop ceph osds (sysvinit) + - name: stop ceph osds with sysvinit service: name: ceph state: stopped when: is_sysvinit.stat.exists == True - - name: stop ceph osds (systemd) + - name: stop ceph osds with systemd service: name: ceph-osd@{{item}} state: stopped enabled: yes with_items: "{{ osd_ids.stdout_lines }}" - when: is_systemd + when: + - is_systemd + - not osd_containerized_deployment roles: - - ceph-common - ceph-osd post_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-osd/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ osd_group_name }} - failed_when: false - - name: get osd numbers shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi" register: osd_ids changed_when: false + when: not osd_containerized_deployment - - name: start ceph osds (upstart) + - name: start ceph osds with upstart service: name: ceph-osd-all state: started when: is_upstart.stat.exists == True - - name: start ceph osds (sysvinit) + - name: start ceph osds with sysvinit service: name: ceph state: started when: is_sysvinit.stat.exists == True - - name: start ceph osds (systemd) + - name: start ceph osds with systemd service: name: ceph-osd@{{item}} state: started enabled: yes with_items: "{{ osd_ids.stdout_lines }}" - when: is_systemd + when: + - is_systemd + - not osd_containerized_deployment + + - name: restart containerized ceph osds with systemd + service: + name: ceph-osd@{{ item | basename }} + state: restarted + enabled: yes + with_items: ceph_osd_docker_devices + when: + - is_systemd + - osd_containerized_deployment - name: waiting for clean pgs... shell: | @@ -256,6 +260,17 @@ retries: "{{ health_osd_check_retries }}" delay: "{{ health_osd_check_delay }}" delegate_to: "{{ groups.mons[0] }}" + when: not osd_containerized_deployment + + - name: container - waiting for clean pgs... + shell: | + test "$(docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph pg stat --cluster {{ cluster }} | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph pg stat --cluster {{ cluster }} | sed 's/pgs.*//;s/^.*://;s/ //')" && docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph health --cluster {{ cluster }} | egrep -sq "HEALTH_OK|HEALTH_WARN" + register: result + until: result.rc == 0 + retries: "{{ health_osd_check_retries }}" + delay: "{{ health_osd_check_delay }}" + delegate_to: "{{ groups.mons[0] }}" + when: osd_containerized_deployment - name: unset osd flags command: ceph osd unset {{ item }} --cluster {{ cluster }} @@ -264,6 +279,17 @@ - noscrub - nodeep-scrub delegate_to: "{{ groups.mons[0] }}" + when: not osd_containerized_deployment + + - name: unset containerized osd flags + command: | + docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph osd unset {{ item }} --cluster {{ cluster }} + with_items: + - noout + - noscrub + - nodeep-scrub + delegate_to: "{{ groups.mons[0] }}" + when: osd_containerized_deployment - name: upgrade ceph mdss cluster @@ -279,13 +305,6 @@ become: True pre_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mds/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ mds_group_name }} - failed_when: false - - name: stop ceph mdss with upstart service: name: ceph-mds @@ -308,17 +327,9 @@ when: is_systemd roles: - - ceph-common - ceph-mds post_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-mds/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ mds_group_name }} - failed_when: false - - name: start ceph mdss with upstart service: name: ceph-mds @@ -354,19 +365,11 @@ become: True pre_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-rgw/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ rgw_group_name }} - failed_when: false - - - name: stop ceph rgws with systemd + - name: stop ceph rgws with upstart service: - name: ceph-radosgw@rgw.{{ ansible_hostname }} + name: ceph-radosgw state: stopped - enabled: yes - when: is_systemd + when: is_upstart.stat.exists == True - name: stop ceph rgws with sysvinit service: @@ -374,30 +377,24 @@ state: stopped when: is_sysvinit.stat.exists == True - - name: stop ceph rgws with upstart + - name: stop ceph rgws with systemd service: - name: ceph-radosgw + name: ceph-radosgw@rgw.{{ ansible_hostname }} state: stopped - when: is_upstart.stat.exists == True + enabled: yes + when: + - is_systemd + - not rgw_containerized_deployment roles: - - ceph-common - ceph-rgw post_tasks: - - include_vars: roles/ceph-common/defaults/main.yml - - include_vars: roles/ceph-rgw/defaults/main.yml - - include_vars: group_vars/all - failed_when: false - - include_vars: group_vars/{{ rgw_group_name }} - failed_when: false - - - name: start ceph rgws with systemd + - name: start ceph rgws with upstart service: - name: ceph-radosgw@rgw.{{ ansible_hostname }} + name: ceph-radosgw state: started - enabled: yes - when: is_systemd + when: is_upstart.stat.exists == True - name: start ceph rgws with sysvinit service: @@ -405,8 +402,20 @@ state: started when: is_sysvinit.stat.exists == True - - name: start ceph rgws with upstart + - name: start ceph rgws with systemd service: - name: ceph-radosgw + name: ceph-radosgw@rgw.{{ ansible_hostname }} state: started - when: is_upstart.stat.exists == True + enabled: yes + when: + - is_systemd + - not rgw_containerized_deployment + + - name: restart containerized ceph rgws with systemd + service: + name: ceph-rgw@{{ ansible_hostname }} + state: restarted + enabled: yes + when: + - is_systemd + - rgw_containerized_deployment diff --git a/roles/ceph-mds/tasks/docker/main.yml b/roles/ceph-mds/tasks/docker/main.yml index fbae68b89..a021b823c 100644 --- a/roles/ceph-mds/tasks/docker/main.yml +++ b/roles/ceph-mds/tasks/docker/main.yml @@ -11,12 +11,15 @@ - name: set fact for using Atomic host set_fact: - is_atomic='{{ stat_ostree.stat.exists }}' + is_atomic: '{{ stat_ostree.stat.exists }}' - include: checks.yml - when: ceph_health.rc != 0 + when: + - ceph_health.rc != 0 + - not "{{ rolling_update | default(false) }}" - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: diff --git a/roles/ceph-mon/tasks/docker/main.yml b/roles/ceph-mon/tasks/docker/main.yml index 4ac1c9ea9..2fc09aa85 100644 --- a/roles/ceph-mon/tasks/docker/main.yml +++ b/roles/ceph-mon/tasks/docker/main.yml @@ -17,8 +17,10 @@ when: - ceph_health.rc != 0 - not mon_containerized_deployment_with_kv + - not "{{ rolling_update | default(false) }}" - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: @@ -58,6 +60,14 @@ - include: start_docker_monitor.yml +# NOTE: if we don't wait we will attempt to copy config to ansible host +# before admin key is ready, preventing future daemons e.g. ceph-mds from +# properly retrieving key +- name: wait for client.admin key exists + wait_for: + path: /etc/ceph/{{ cluster }}.client.admin.keyring + when: cephx + - include: copy_configs.yml when: not mon_containerized_deployment_with_kv diff --git a/roles/ceph-mon/tasks/docker/start_docker_monitor.yml b/roles/ceph-mon/tasks/docker/start_docker_monitor.yml index c6c5e402a..16147050f 100644 --- a/roles/ceph-mon/tasks/docker/start_docker_monitor.yml +++ b/roles/ceph-mon/tasks/docker/start_docker_monitor.yml @@ -72,11 +72,6 @@ changed_when: false when: ansible_os_family == 'RedHat' or ansible_os_family == 'CoreOS' -- name: wait for ceph.conf exists - wait_for: - path: "/etc/ceph/{{ cluster }}.conf" - when: ansible_os_family == 'RedHat' - - name: run the ceph monitor docker image docker: image: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}:{{ ceph_mon_docker_image_tag }}" diff --git a/roles/ceph-nfs/tasks/docker/main.yml b/roles/ceph-nfs/tasks/docker/main.yml index ca7732c60..fc2024381 100644 --- a/roles/ceph-nfs/tasks/docker/main.yml +++ b/roles/ceph-nfs/tasks/docker/main.yml @@ -19,6 +19,7 @@ not mon_containerized_deployment_with_kv - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: diff --git a/roles/ceph-osd/tasks/docker/main.yml b/roles/ceph-osd/tasks/docker/main.yml index 16ccd8cb2..b418fa132 100644 --- a/roles/ceph-osd/tasks/docker/main.yml +++ b/roles/ceph-osd/tasks/docker/main.yml @@ -9,6 +9,7 @@ when: - ceph_health.rc != 0 - not osd_containerized_deployment_with_kv + - not "{{ rolling_update | default(false) }}" - name: check if it is Atomic host stat: path=/run/ostree-booted @@ -16,9 +17,10 @@ - name: set fact for using Atomic host set_fact: - is_atomic: '{{ stat_ostree.stat.exists }}' + is_atomic: '{{ stat_ostree.stat.exists }}' - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: diff --git a/roles/ceph-rbd-mirror/tasks/docker/main.yml b/roles/ceph-rbd-mirror/tasks/docker/main.yml index 7bfe9da2c..87aace0be 100644 --- a/roles/ceph-rbd-mirror/tasks/docker/main.yml +++ b/roles/ceph-rbd-mirror/tasks/docker/main.yml @@ -17,6 +17,7 @@ when: ceph_health.rc != 0 - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: diff --git a/roles/ceph-restapi/tasks/docker/main.yml b/roles/ceph-restapi/tasks/docker/main.yml index fc2274794..31b44ce9b 100644 --- a/roles/ceph-restapi/tasks/docker/main.yml +++ b/roles/ceph-restapi/tasks/docker/main.yml @@ -8,6 +8,7 @@ is_atomic: '{{ stat_ostree.stat.exists }}' - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: diff --git a/roles/ceph-rgw/tasks/docker/main.yml b/roles/ceph-rgw/tasks/docker/main.yml index 63579ed8a..96de18281 100644 --- a/roles/ceph-rgw/tasks/docker/main.yml +++ b/roles/ceph-rgw/tasks/docker/main.yml @@ -11,12 +11,15 @@ - name: set fact for using Atomic host set_fact: - is_atomic='{{ stat_ostree.stat.exists }}' + is_atomic: '{{ stat_ostree.stat.exists }}' - include: checks.yml - when: ceph_health.rc != 0 + when: + - ceph_health.rc != 0 + - not "{{ rolling_update | default(false) }}" - include: pre_requisite.yml + when: not is_atomic - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml" when: -- 2.39.5