]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
Support containerized rolling update
authorIvan Font <ifont@redhat.com>
Sun, 6 Nov 2016 04:15:26 +0000 (21:15 -0700)
committerIvan Font <ifont@redhat.com>
Thu, 17 Nov 2016 19:25:25 +0000 (11:25 -0800)
- Update rolling update playbook to support containerized deployments
  for mons, osds, mdss, and rgws
- Skip checking if existing cluster is running when performing a rolling
  update
- Fixed bug where we were failing to start the mds container because it
  was missing the admin keyring. The admin keyring was missing because
  it was not being pushed from the mon host to the ansible host due to
  the keyring not being available before running the copy_configs.yml
  task include file. Now we forcefully wait for the admin keyring to be
  generated before continuing with the copy_configs.yml task include file
- Skip pre_requisite.yml when running on atomic host. This technically
  no longer requires specifying to skip tasks containing the with_pkg tag
- Add missing variables to all.docker.sample
- Misc. cleanup

Signed-off-by: Ivan Font <ifont@redhat.com>
group_vars/all.docker.sample
infrastructure-playbooks/rolling_update.yml
roles/ceph-mds/tasks/docker/main.yml
roles/ceph-mon/tasks/docker/main.yml
roles/ceph-mon/tasks/docker/start_docker_monitor.yml
roles/ceph-nfs/tasks/docker/main.yml
roles/ceph-osd/tasks/docker/main.yml
roles/ceph-rbd-mirror/tasks/docker/main.yml
roles/ceph-restapi/tasks/docker/main.yml
roles/ceph-rgw/tasks/docker/main.yml

index c30b8f40340663952f774ec99935ad7e5b3f1349..d7e252ea9e1d888f97fdcb67b4d80116db1286f7 100644 (file)
@@ -39,6 +39,9 @@ dummy:
 #ceph_osd_docker_devices:
 # - /dev/sdb
 # - /dev/sdc
+#journal_size: 5120 # OSD journal size in MB
+#public_network: 0.0.0.0/0
+#cluster_network: "{{ public_network }}"
 
 #######
 # MDS #
index 3a4b39f6e62fb4468208315622e112ce815cc7ee..9f8394c0812e8e77b0a90354aec4d035d907b722 100644 (file)
@@ -44,6 +44,9 @@
   become: True
   tasks:
     - debug: msg="gather facts on all Ceph hosts for following reference"
+
+    - set_fact: rolling_update=true
+
     - name: check if sysvinit
       stat:
         path: /etc/rc?.d/S??ceph
@@ -64,7 +67,6 @@
 
   vars:
     mon_group_name:       mons
-    restapi_group_name:   restapis
     health_mon_check_retries: 5
     health_mon_check_delay: 10
     upgrade_ceph_packages: True
   become: True
 
   pre_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-mon/defaults/main.yml
-    - include_vars: roles/ceph-restapi/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ mon_group_name }}
-      failed_when: false
-    - include_vars: group_vars/{{ restapi_group_name }}
-      failed_when: false
-
     - name: stop ceph mons with upstart
       service:
         name: ceph-mon
       when: is_systemd
 
   roles:
-    - ceph-common
     - ceph-mon
 
   post_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-mon/defaults/main.yml
-    - include_vars: roles/ceph-restapi/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ mon_group_name }}
-      failed_when: false
-    - include_vars: group_vars/{{ restapi_group_name }}
-      failed_when: false
-
     - name: start ceph mons with upstart
       service:
         name: ceph-mon
         enabled: yes
       when: is_systemd
 
-    - name: select a running monitor
+    - name: set mon_host_count
+      set_fact: mon_host_count={{ groups.mons | length }}
+
+    - name: select a running monitor if multiple monitors
       set_fact: mon_host={{ item }}
       with_items: "{{ groups.mons }}"
-      when: item != inventory_hostname
+      when:
+        - mon_host_count | int > 1
+        - item != inventory_hostname
+
+    - name: select first monitor if only one monitor
+      set_fact: mon_host={{ item }}
+      with_items: "{{ groups.mons[0] }}"
+      when:
+        - mon_host_count | int == 1
 
     - name: waiting for the monitor to join the quorum...
       shell: |
       retries: "{{ health_mon_check_retries }}"
       delay: "{{ health_mon_check_delay }}"
       delegate_to: "{{ mon_host }}"
+      when: not mon_containerized_deployment
+
+    - name: waiting for the containerized monitor to join the quorum...
+      shell: |
+        docker exec {{ hostvars[mon_host]['ansible_hostname'] }} ceph -s  --cluster {{ cluster }} | grep quorum | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
+      register: result
+      until: result.rc == 0
+      retries: "{{ health_mon_check_retries }}"
+      delay: "{{ health_mon_check_delay }}"
+      delegate_to: "{{ mon_host }}"
+      when: mon_containerized_deployment
 
 
 - name: upgrade ceph osds cluster
   become: True
 
   pre_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-osd/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ osd_group_name }}
-      failed_when: false
-
     - name: set osd flags
       command: ceph osd set {{ item }} --cluster {{ cluster }}
       with_items:
       shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi"
       register: osd_ids
       changed_when: false
+      when: not osd_containerized_deployment
 
-    - name: stop ceph osds (upstart)
+    - name: stop ceph osds with upstart
       service:
         name: ceph-osd-all
         state: stopped
       when: is_upstart.stat.exists == True
 
-    - name: stop ceph osds (sysvinit)
+    - name: stop ceph osds with sysvinit
       service:
         name: ceph
         state: stopped
       when: is_sysvinit.stat.exists == True
 
-    - name: stop ceph osds (systemd)
+    - name: stop ceph osds with systemd
       service:
         name: ceph-osd@{{item}}
         state: stopped
         enabled: yes
       with_items: "{{ osd_ids.stdout_lines }}"
-      when: is_systemd
+      when:
+        - is_systemd
+        - not osd_containerized_deployment
 
   roles:
-    - ceph-common
     - ceph-osd
 
   post_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-osd/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ osd_group_name }}
-      failed_when: false
-
     - name: get osd numbers
       shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi"
       register: osd_ids
       changed_when: false
+      when: not osd_containerized_deployment
 
-    - name: start ceph osds (upstart)
+    - name: start ceph osds with upstart
       service:
         name: ceph-osd-all
         state: started
       when: is_upstart.stat.exists == True
 
-    - name: start ceph osds (sysvinit)
+    - name: start ceph osds with sysvinit
       service:
         name: ceph
         state: started
       when: is_sysvinit.stat.exists == True
 
-    - name: start ceph osds (systemd)
+    - name: start ceph osds with systemd
       service:
         name: ceph-osd@{{item}}
         state: started
         enabled: yes
       with_items: "{{ osd_ids.stdout_lines }}"
-      when: is_systemd
+      when:
+        - is_systemd
+        - not osd_containerized_deployment
+
+    - name: restart containerized ceph osds with systemd
+      service:
+        name: ceph-osd@{{ item | basename }}
+        state: restarted
+        enabled: yes
+      with_items: ceph_osd_docker_devices
+      when:
+        - is_systemd
+        - osd_containerized_deployment
 
     - name: waiting for clean pgs...
       shell: |
       retries: "{{ health_osd_check_retries }}"
       delay: "{{ health_osd_check_delay }}"
       delegate_to: "{{ groups.mons[0] }}"
+      when: not osd_containerized_deployment
+
+    - name: container - waiting for clean pgs...
+      shell: |
+        test "$(docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph pg stat --cluster {{ cluster }} | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph pg stat --cluster {{ cluster }} | sed 's/pgs.*//;s/^.*://;s/ //')" && docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph health --cluster {{ cluster }}  | egrep -sq "HEALTH_OK|HEALTH_WARN"
+      register: result
+      until: result.rc == 0
+      retries: "{{ health_osd_check_retries }}"
+      delay: "{{ health_osd_check_delay }}"
+      delegate_to: "{{ groups.mons[0] }}"
+      when: osd_containerized_deployment
 
     - name: unset osd flags
       command: ceph osd unset {{ item }} --cluster {{ cluster }}
         - noscrub
         - nodeep-scrub
       delegate_to: "{{ groups.mons[0] }}"
+      when: not osd_containerized_deployment
+
+    - name: unset containerized osd flags
+      command: |
+          docker exec {{ hostvars[groups.mons[0]]['ansible_hostname'] }} ceph osd unset {{ item }} --cluster {{ cluster }}
+      with_items:
+        - noout
+        - noscrub
+        - nodeep-scrub
+      delegate_to: "{{ groups.mons[0] }}"
+      when: osd_containerized_deployment
 
 
 - name: upgrade ceph mdss cluster
   become: True
 
   pre_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-mds/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ mds_group_name }}
-      failed_when: false
-
     - name: stop ceph mdss with upstart
       service:
         name: ceph-mds
       when: is_systemd
 
   roles:
-    - ceph-common
     - ceph-mds
 
   post_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-mds/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ mds_group_name }}
-      failed_when: false
-
     - name: start ceph mdss with upstart
       service:
         name: ceph-mds
   become: True
 
   pre_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-rgw/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ rgw_group_name }}
-      failed_when: false
-
-    - name: stop ceph rgws with systemd
+    - name: stop ceph rgws with upstart
       service:
-        name: ceph-radosgw@rgw.{{ ansible_hostname }}
+        name: ceph-radosgw
         state: stopped
-        enabled: yes
-      when: is_systemd
+      when: is_upstart.stat.exists == True
 
     - name: stop ceph rgws with sysvinit
       service:
         state: stopped
       when: is_sysvinit.stat.exists == True
 
-    - name: stop ceph rgws with upstart
+    - name: stop ceph rgws with systemd
       service:
-        name: ceph-radosgw
+        name: ceph-radosgw@rgw.{{ ansible_hostname }}
         state: stopped
-      when: is_upstart.stat.exists == True
+        enabled: yes
+      when:
+        - is_systemd
+        - not rgw_containerized_deployment
 
   roles:
-    - ceph-common
     - ceph-rgw
 
   post_tasks:
-    - include_vars: roles/ceph-common/defaults/main.yml
-    - include_vars: roles/ceph-rgw/defaults/main.yml
-    - include_vars: group_vars/all
-      failed_when: false
-    - include_vars: group_vars/{{ rgw_group_name }}
-      failed_when: false
-
-    - name: start ceph rgws with systemd
+    - name: start ceph rgws with upstart
       service:
-        name: ceph-radosgw@rgw.{{ ansible_hostname }}
+        name: ceph-radosgw
         state: started
-        enabled: yes
-      when: is_systemd
+      when: is_upstart.stat.exists == True
 
     - name: start ceph rgws with sysvinit
       service:
         state: started
       when: is_sysvinit.stat.exists == True
 
-    - name: start ceph rgws with upstart
+    - name: start ceph rgws with systemd
       service:
-        name: ceph-radosgw
+        name: ceph-radosgw@rgw.{{ ansible_hostname }}
         state: started
-      when: is_upstart.stat.exists == True
+        enabled: yes
+      when:
+        - is_systemd
+        - not rgw_containerized_deployment
+
+    - name: restart containerized ceph rgws with systemd
+      service:
+        name: ceph-rgw@{{ ansible_hostname }}
+        state: restarted
+        enabled: yes
+      when:
+        - is_systemd
+        - rgw_containerized_deployment
index fbae68b89ceb872158a20deebe3cd4968f90052c..a021b823cb2949278b1a230f20ae84f6e22b6dba 100644 (file)
 
 - name: set fact for using Atomic host
   set_fact:
-      is_atomic='{{ stat_ostree.stat.exists }}'
+    is_atomic: '{{ stat_ostree.stat.exists }}'
 
 - include: checks.yml
-  when: ceph_health.rc != 0
+  when:
+    - ceph_health.rc != 0
+    - not "{{ rolling_update | default(false) }}"
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
index 4ac1c9ea94d0bc757759ec322ff89f0dc9365bc5..2fc09aa8567b472cf7eba0dfc4276c6ae32eb44e 100644 (file)
   when:
     - ceph_health.rc != 0
     - not mon_containerized_deployment_with_kv
+    - not "{{ rolling_update | default(false) }}"
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
 
 - include: start_docker_monitor.yml
 
+# NOTE: if we don't wait we will attempt to copy config to ansible host
+# before admin key is ready, preventing future daemons e.g. ceph-mds from
+# properly retrieving key
+- name: wait for client.admin key exists
+  wait_for:
+    path: /etc/ceph/{{ cluster }}.client.admin.keyring
+  when: cephx
+
 - include: copy_configs.yml
   when: not mon_containerized_deployment_with_kv
 
index c6c5e402a58a72c6998966b00dee7d0080014bcd..16147050fe1ab8602f9bf9ef2783fd071472e95b 100644 (file)
   changed_when: false
   when: ansible_os_family == 'RedHat' or ansible_os_family == 'CoreOS'
 
-- name: wait for ceph.conf exists
-  wait_for:
-      path: "/etc/ceph/{{ cluster }}.conf"
-  when: ansible_os_family == 'RedHat'
-
 - name: run the ceph monitor docker image
   docker:
     image: "{{ ceph_mon_docker_username }}/{{ ceph_mon_docker_imagename }}:{{ ceph_mon_docker_image_tag }}"
index ca7732c60438a3e63879ef46eb1f3a4756d2d328..fc2024381067fab33b659a6c36c18becd53f9fe8 100644 (file)
@@ -19,6 +19,7 @@
     not mon_containerized_deployment_with_kv
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
index 16ccd8cb2eb3ea01eb4f16c85c96c3201c3fbec0..b418fa132f72722d80ea168ef793f2ee55c8746d 100644 (file)
@@ -9,6 +9,7 @@
   when:
     - ceph_health.rc != 0
     - not osd_containerized_deployment_with_kv
+    - not "{{ rolling_update | default(false) }}"
 
 - name: check if it is Atomic host
   stat: path=/run/ostree-booted
 
 - name: set fact for using Atomic host
   set_fact:
-      is_atomic: '{{ stat_ostree.stat.exists }}'
+    is_atomic: '{{ stat_ostree.stat.exists }}'
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
index 7bfe9da2c364faa7e42e5ba9414f61bcc05ab2dd..87aace0be4278797408810b418aec5c0ff499a39 100644 (file)
@@ -17,6 +17,7 @@
   when: ceph_health.rc != 0
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
index fc22747941397b95e8aa8668ebabbbea0d011b76..31b44ce9babd8ce126f12727eb5493f3a9a7e81f 100644 (file)
@@ -8,6 +8,7 @@
       is_atomic: '{{ stat_ostree.stat.exists }}'
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when:
index 63579ed8a8c061781a47cc8b36a625770fad2bb0..96de18281434fbdc1b4af973f4c48f442dd5b5ee 100644 (file)
 
 - name: set fact for using Atomic host
   set_fact:
-      is_atomic='{{ stat_ostree.stat.exists }}'
+    is_atomic: '{{ stat_ostree.stat.exists }}'
 
 - include: checks.yml
-  when: ceph_health.rc != 0
+  when:
+    - ceph_health.rc != 0
+    - not "{{ rolling_update | default(false) }}"
 
 - include: pre_requisite.yml
+  when: not is_atomic
 
 - include: "{{ playbook_dir }}/roles/ceph-common/tasks/misc/ntp_atomic.yml"
   when: