]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
misc.wait_until_osds_up(): timeout after 5min 1033/head
authorZack Cerza <zack@redhat.com>
Fri, 24 Feb 2017 00:36:05 +0000 (17:36 -0700)
committerZack Cerza <zack@redhat.com>
Fri, 24 Feb 2017 00:36:05 +0000 (17:36 -0700)
It doesn't make any sense to wait more than a few minutes for OSDs to
come up. If they take more than five minutes, fail the job.

Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/misc.py

index b9b5baa0313a3e7d7c82cc439a73d496135826b8..35d9f0e00e2a0e9615e5c37a03f5a9b019fa079b 100644 (file)
@@ -910,26 +910,26 @@ def wait_until_osds_up(ctx, cluster, remote, ceph_cluster='ceph'):
     """Wait until all Ceph OSDs are booted."""
     num_osds = num_instances_of_type(cluster, 'osd', ceph_cluster)
     testdir = get_testdir(ctx)
-    while True:
-        r = remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                '--cluster', ceph_cluster,
-                'osd', 'dump', '--format=json'
-            ],
-            stdout=StringIO(),
-            logger=log.getChild('health'),
-        )
-        out = r.stdout.getvalue()
-        j = json.loads('\n'.join(out.split('\n')[1:]))
-        up = len(filter(lambda o: 'up' in o['state'], j['osds']))
-        log.debug('%d of %d OSDs are up' % (up, num_osds))
-        if up == num_osds:
-            break
-        time.sleep(1)
+    with safe_while(sleep=6, tries=50) as proceed:
+        while proceed():
+            r = remote.run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'ceph',
+                    '--cluster', ceph_cluster,
+                    'osd', 'dump', '--format=json'
+                ],
+                stdout=StringIO(),
+                logger=log.getChild('health'),
+            )
+            out = r.stdout.getvalue()
+            j = json.loads('\n'.join(out.split('\n')[1:]))
+            up = len(filter(lambda o: 'up' in o['state'], j['osds']))
+            log.debug('%d of %d OSDs are up' % (up, num_osds))
+            if up == num_osds:
+                break
 
 
 def reboot(node, timeout=300, interval=30):