From: Zack Cerza Date: Fri, 24 Feb 2017 00:36:05 +0000 (-0700) Subject: misc.wait_until_osds_up(): timeout after 5min X-Git-Tag: 1.1.0~458^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F1033%2Fhead;p=teuthology.git misc.wait_until_osds_up(): timeout after 5min It doesn't make any sense to wait more than a few minutes for OSDs to come up. If they take more than five minutes, fail the job. Signed-off-by: Zack Cerza --- diff --git a/teuthology/misc.py b/teuthology/misc.py index b9b5baa031..35d9f0e00e 100644 --- a/teuthology/misc.py +++ b/teuthology/misc.py @@ -910,26 +910,26 @@ def wait_until_osds_up(ctx, cluster, remote, ceph_cluster='ceph'): """Wait until all Ceph OSDs are booted.""" num_osds = num_instances_of_type(cluster, 'osd', ceph_cluster) testdir = get_testdir(ctx) - while True: - r = remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'ceph', - '--cluster', ceph_cluster, - 'osd', 'dump', '--format=json' - ], - stdout=StringIO(), - logger=log.getChild('health'), - ) - out = r.stdout.getvalue() - j = json.loads('\n'.join(out.split('\n')[1:])) - up = len(filter(lambda o: 'up' in o['state'], j['osds'])) - log.debug('%d of %d OSDs are up' % (up, num_osds)) - if up == num_osds: - break - time.sleep(1) + with safe_while(sleep=6, tries=50) as proceed: + while proceed(): + r = remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + '--cluster', ceph_cluster, + 'osd', 'dump', '--format=json' + ], + stdout=StringIO(), + logger=log.getChild('health'), + ) + out = r.stdout.getvalue() + j = json.loads('\n'.join(out.split('\n')[1:])) + up = len(filter(lambda o: 'up' in o['state'], j['osds'])) + log.debug('%d of %d OSDs are up' % (up, num_osds)) + if up == num_osds: + break def reboot(node, timeout=300, interval=30):