From d20733b11e96d71be8240c1c9899cd8afaad9aba Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Fri, 11 Dec 2015 17:00:11 -0800 Subject: [PATCH] buildpackages: catch VM instances in ERROR state. It is possible for 'server create' to return success, but then NOT have a good VM afterwards. The instance will be in state ERROR. Signed-off-by: Robin H. Johnson (cherry picked from commit 785801c2e9fec3fe81b0a1759d137367c9716dcb) --- tasks/buildpackages/Makefile | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tasks/buildpackages/Makefile b/tasks/buildpackages/Makefile index bb508f68ed0da..3fb269c22f28b 100644 --- a/tasks/buildpackages/Makefile +++ b/tasks/buildpackages/Makefile @@ -49,12 +49,24 @@ ${PKG_REPO}: flock --close ${D}/flock-$@.lock ${MAKE} flock-$@ touch ${D}/$@ +# Just because 'server create' return success does not mean it actually succeeded! +# Check the server status before we proceed. +# If it's a weird status, bail out and let the delete fire +# eg: ERROR status can happen if there is no VM host without enough capacity for the request. ceph-${CEPH_PKG_TYPE}-${CEPH_DIST}-${CEPH_ARCH}-${CEPH_FLAVOR}-${CEPH_SHA1}: ${PKG_REPO} openstack server create --image 'teuthology-${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-${CEPH_ARCH}' ${OPENSTACK_NETWORK} --flavor ${BUILD_FLAVOR} --key-name teuthology --security-group teuthology --property ownedby=${MY_IP} --user-data ${CEPH_OS_TYPE}-${CEPH_OS_VERSION}-user-data.txt --wait $@ - sleep 30 set -ex ; \ trap "openstack server delete --wait $@" EXIT ; \ + for delay in 30 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 ; do \ + status=$$(openstack server show -c status -f value $@) ; \ + case $$status in \ + ACTIVE) break ;; \ + NOSTATE|*BUILD|*BOOT|*RESIZE) sleep $$delay ;; \ + *) exit 1 ;; \ + esac ; \ + done ; \ ip=$(call get_ip,$@) ; \ + test -n "$$ip" || exit ; \ for delay in 1 2 4 8 8 8 8 8 8 8 8 8 16 16 16 16 16 32 32 32 64 128 256 512 ; do if ssh -o 'ConnectTimeout=3' $$ip bash -c '"grep -q READYTORUN /var/log/cloud-init*.log"' ; then break ; else sleep $$delay ; fi ; done ; \ scp make-${CEPH_PKG_TYPE}.sh common.sh ubuntu@$$ip: ; \ packages_repository=$(call get_ip,${