From c7e269fcf5620a49909b880f57f5cbb988c27b07 Mon Sep 17 00:00:00 2001 From: Subhachandra Chandra Date: Fri, 16 Mar 2018 10:10:14 -0700 Subject: [PATCH] Fix restarting OSDs twice during a rolling update. During a rolling update, OSDs are restarted twice currently. Once, by the handler in roles/ceph-defaults/handlers/main.yml and a second time by tasks in the rolling_update playbook. This change turns off restarts by the handler. Further, the restart initiated by the rolling_update playbook is more efficient as it restarts all the OSDs on a host as one operation and waits for them to rejoin the cluster. The restart task in the handler restarts one OSD at a time and waits for it to join the cluster. --- roles/ceph-defaults/handlers/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/ceph-defaults/handlers/main.yml b/roles/ceph-defaults/handlers/main.yml index 65b227a37..11e1a16de 100644 --- a/roles/ceph-defaults/handlers/main.yml +++ b/roles/ceph-defaults/handlers/main.yml @@ -64,6 +64,9 @@ # This does not just restart OSDs but everything else too. Unfortunately # at this time the ansible role does not have an OSD id list to use # for restarting them specifically. +# This does not need to run during a rolling update as the playbook will +# restart all OSDs using the tasks "start ceph osd" or +# "restart containerized ceph osd" - name: copy osd restart script template: src: restart_osd_daemon.sh.j2 @@ -74,6 +77,7 @@ listen: "restart ceph osds" when: - osd_group_name in group_names + - not rolling_update - name: restart ceph osds daemon(s) - non container command: /usr/bin/env bash /tmp/restart_osd_daemon.sh @@ -81,6 +85,7 @@ when: - osd_group_name in group_names - not containerized_deployment + - not rolling_update # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - osd_socket_stat.rc == 0 @@ -99,6 +104,7 @@ # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - osd_group_name in group_names - containerized_deployment + - not rolling_update - ceph_osd_container_stat.get('rc') == 0 - inventory_hostname == groups.get(osd_group_name) | last - ceph_osd_container_stat.get('stdout_lines', [])|length != 0 -- 2.39.5