From: Christian Zunker Date: Mon, 12 Jun 2017 08:30:22 +0000 (+0000) Subject: Restart all OSDs and do not stop after the first one. X-Git-Tag: v2.3.0rc2~25^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F1595%2Fhead;p=ceph-ansible.git Restart all OSDs and do not stop after the first one. The current handler only restarts one OSD on each OSD server. After the first one the handler stops, not matter what results the checks had. Co-Authored-By: Gaudenz Steinlin (@gaudenz) --- diff --git a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 index 5d1d04597..f027574cf 100644 --- a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 @@ -8,7 +8,7 @@ check_pgs() { while [ $RETRIES -ne 0 ]; do test "[""$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" == "$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')" RET=$? - test $RET -eq 0 && exit 0 + test $RET -eq 0 && return 0 sleep $DELAY let RETRIES=RETRIES-1 done @@ -28,7 +28,7 @@ for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do # Wait and ensure the socket exists after restarting the daemon SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok while [ $COUNT -ne 0 ]; do - test -S $SOCKET && check_pgs + test -S $SOCKET && check_pgs && continue 2 sleep 1 let COUNT=COUNT-1 done