From: Guillaume Abrioux Date: Wed, 20 Oct 2021 08:01:05 +0000 (+0200) Subject: update: support upgrading a subset of nodes X-Git-Tag: v4.0.67~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=dc1a4c29ea2a1ce77aa56223ed39fb470ef2276f;p=ceph-ansible.git update: support upgrading a subset of nodes It can be useful in a large cluster deployment to split the upgrade and only upgrade a group of nodes at a time. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2014304 Signed-off-by: Guillaume Abrioux (cherry picked from commit e5cf9db2b04f55196d867f5a7248b455307f4407) --- diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml index 7bd626a7f..23fe397f2 100644 --- a/infrastructure-playbooks/rolling_update.yml +++ b/infrastructure-playbooks/rolling_update.yml @@ -16,6 +16,7 @@ - name: confirm whether user really meant to upgrade the cluster hosts: localhost + tags: always become: false gather_facts: false vars: @@ -39,7 +40,6 @@ - name: gather facts and check the init system - hosts: - "{{ mon_group_name|default('mons') }}" - "{{ osd_group_name|default('osds') }}" @@ -51,7 +51,7 @@ - "{{ client_group_name|default('clients') }}" - "{{ iscsi_gw_group_name|default('iscsigws') }}" - "{{ grafana_server_group_name|default('grafana-server') }}" - + tags: always any_errors_fatal: True become: True gather_facts: False @@ -128,6 +128,7 @@ rolling_update: true - name: upgrade ceph mon cluster + tags: mons vars: health_mon_check_retries: 5 health_mon_check_delay: 15 @@ -315,6 +316,7 @@ - name: reset mon_host hosts: "{{ mon_group_name|default('mons') }}" + tags: always become: True gather_facts: false tasks: @@ -332,6 +334,7 @@ health_mon_check_delay: 15 upgrade_ceph_packages: True hosts: "{{ mon_group_name|default('mons') }}" + tags: mgrs serial: 1 become: True gather_facts: false @@ -367,6 +370,7 @@ upgrade_ceph_packages: True ceph_release: "{{ ceph_stable_release }}" hosts: "{{ mgr_group_name|default('mgrs') }}" + tags: mgrs serial: 1 become: True gather_facts: false @@ -402,6 +406,7 @@ - name: set osd flags hosts: "{{ mon_group_name | default('mons') }}[0]" + tags: osds become: True gather_facts: false tasks: @@ -457,8 +462,8 @@ health_osd_check_retries: 40 health_osd_check_delay: 30 upgrade_ceph_packages: True - hosts: "{{ osd_group_name|default('osds') }}" + tags: osds serial: 1 become: True gather_facts: false @@ -539,6 +544,7 @@ - name: complete osd upgrade hosts: "{{ mon_group_name|default('mons') }}[0]" + tags: osds become: True gather_facts: false tasks: @@ -597,6 +603,7 @@ - name: upgrade ceph mdss cluster, deactivate all rank > 0 hosts: "{{ mon_group_name | default('mons') }}[0]" + tags: mdss become: true gather_facts: false tasks: @@ -686,6 +693,7 @@ vars: upgrade_ceph_packages: True hosts: active_mdss + tags: mdss become: true gather_facts: false tasks: @@ -732,6 +740,7 @@ vars: upgrade_ceph_packages: True hosts: standby_mdss + tags: mdss become: True gather_facts: false @@ -773,6 +782,7 @@ vars: upgrade_ceph_packages: True hosts: "{{ rgw_group_name|default('rgws') }}" + tags: rgws serial: 1 become: True gather_facts: false @@ -817,6 +827,7 @@ vars: upgrade_ceph_packages: True hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}" + tags: rbdmirrors serial: 1 become: True gather_facts: false @@ -850,6 +861,7 @@ vars: upgrade_ceph_packages: True hosts: "{{ nfs_group_name|default('nfss') }}" + tags: nfss serial: 1 become: True gather_facts: false @@ -898,8 +910,8 @@ - name: upgrade ceph iscsi gateway node vars: upgrade_ceph_packages: True - hosts: - - "{{ iscsi_gw_group_name|default('iscsigws') }}" + hosts: "{{ iscsi_gw_group_name|default('iscsigws') }}" + tags: iscsigws serial: 1 become: True gather_facts: false @@ -941,6 +953,7 @@ vars: upgrade_ceph_packages: True hosts: "{{ client_group_name|default('clients') }}" + tags: clients serial: "{{ client_update_batch | default(20) }}" become: True gather_facts: false @@ -972,6 +985,9 @@ - "{{ rgw_group_name | default('rgws') }}" - "{{ rbdmirror_group_name | default('rbdmirrors') }}" - "{{ mgr_group_name | default('mgrs') }}" + tags: + - post_upgrade + - crash gather_facts: false become: true tasks: @@ -1005,15 +1021,16 @@ - name: complete upgrade hosts: - - "{{ mon_group_name | default('mons') }}" - - "{{ mgr_group_name | default('mgrs') }}" - - "{{ osd_group_name | default('osds') }}" - - "{{ mds_group_name | default('mdss') }}" - - "{{ rgw_group_name | default('rgws') }}" - - "{{ nfs_group_name | default('nfss') }}" - - "{{ rbdmirror_group_name | default('rbdmirrors') }}" - - "{{ client_group_name | default('clients') }}" - - "{{ iscsi_gw_group_name | default('iscsigws') }}" + - "{{ mon_group_name | default('mons') }}" + - "{{ mgr_group_name | default('mgrs') }}" + - "{{ osd_group_name | default('osds') }}" + - "{{ mds_group_name | default('mdss') }}" + - "{{ rgw_group_name | default('rgws') }}" + - "{{ nfs_group_name | default('nfss') }}" + - "{{ rbdmirror_group_name | default('rbdmirrors') }}" + - "{{ client_group_name | default('clients') }}" + - "{{ iscsi_gw_group_name | default('iscsigws') }}" + tags: post_upgrade become: True gather_facts: false tasks: @@ -1073,6 +1090,7 @@ - "{{ nfs_group_name|default('nfss') }}" - "{{ iscsi_gw_group_name|default('iscsigws') }}" - "{{ grafana_server_group_name|default('grafana-server') }}" + tags: monitoring gather_facts: false become: true tasks: @@ -1103,6 +1121,7 @@ - name: upgrade monitoring node hosts: "{{ grafana_server_group_name }}" + tags: monitoring gather_facts: false become: true tasks: @@ -1134,6 +1153,7 @@ - name: upgrade ceph dashboard hosts: "{{ groups[mgr_group_name] | default(groups[mon_group_name]) | default(omit) }}" + tags: monitoring gather_facts: false become: true tasks: @@ -1153,6 +1173,7 @@ - name: switch any existing crush buckets to straw2 hosts: "{{ mon_group_name | default('mons') }}[0]" + tags: post_upgrade become: true any_errors_fatal: true gather_facts: false @@ -1194,6 +1215,7 @@ - name: show ceph status hosts: "{{ mon_group_name|default('mons') }}" + tags: always become: True gather_facts: false tasks: diff --git a/tox-subset_update.ini b/tox-subset_update.ini new file mode 100644 index 000000000..95bc3cb4a --- /dev/null +++ b/tox-subset_update.ini @@ -0,0 +1,111 @@ +[tox] +envlist = centos-{container,non_container}-subset_update + +skipsdist = True + +[testenv] +whitelist_externals = + vagrant + bash + git + pip +passenv=* +setenv= + ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config -o ControlMaster=auto -o ControlPersist=600s -o PreferredAuthentications=publickey + ANSIBLE_CONFIG = {toxinidir}/ansible.cfg + ANSIBLE_CALLBACK_WHITELIST = profile_tasks + ANSIBLE_CACHE_PLUGIN = memory + ANSIBLE_GATHERING = implicit + # only available for ansible >= 2.5 + ANSIBLE_STDOUT_CALLBACK = yaml +# non_container: DEV_SETUP = True + # Set the vagrant box image to use + centos-non_container: CEPH_ANSIBLE_VAGRANT_BOX = centos/8 + centos-container: CEPH_ANSIBLE_VAGRANT_BOX = centos/8 + + INVENTORY = {env:_INVENTORY:hosts} + container: CONTAINER_DIR = /container + container: PLAYBOOK = site-container.yml.sample + non_container: PLAYBOOK = site.yml.sample + + UPDATE_CEPH_DOCKER_IMAGE_TAG = latest-master + UPDATE_CEPH_DEV_BRANCH = master + UPDATE_CEPH_DEV_SHA1 = latest + ROLLING_UPDATE = True +deps= -r{toxinidir}/tests/requirements.txt +changedir={toxinidir}/tests/functional/subset_update{env:CONTAINER_DIR:} +commands= + bash {toxinidir}/tests/scripts/vagrant_up.sh --no-provision {posargs:--provider=virtualbox} + bash {toxinidir}/tests/scripts/generate_ssh_config.sh {changedir} + + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/tests/functional/setup.yml + + non_container: ansible-playbook -vv -i "localhost," -c local {toxinidir}/tests/functional/dev_setup.yml --extra-vars "dev_setup=True change_dir={changedir} ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest}" --tags "vagrant_setup" + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/{env:PLAYBOOK:site.yml.sample} --extra-vars "\ + delegate_facts_host={env:DELEGATE_FACTS_HOST:True} \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " + +# upgrade mons + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=mons --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " +# upgrade mgrs + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=mgrs --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " +# upgrade osd1 + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --limit=osd1 --tags=osds --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " +# upgrade remaining osds (serially) + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --limit='osds:!osd1' --tags=osds --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " +# upgrade rgws + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=rgws --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " +# post upgrade actions + ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=post_upgrade --extra-vars "\ + ireallymeanit=yes \ + ceph_dev_branch={env:UPDATE_CEPH_DEV_BRANCH:master} \ + ceph_dev_sha1={env:UPDATE_CEPH_DEV_SHA1:latest} \ + ceph_docker_registry_auth=True \ + ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \ + ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \ + " + + +# bash -c "CEPH_STABLE_RELEASE=quincy py.test --reruns 5 --reruns-delay 1 -n 8 --durations=0 --sudo -v --connection=ansible --ansible-inventory={changedir}/{env:INVENTORY} --ssh-config={changedir}/vagrant_ssh_config {toxinidir}/tests/functional/tests" + +# vagrant destroy --force