]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
Give up on wait_until_healthy() after 15min
authorZack Cerza <zack@cerza.org>
Fri, 7 Mar 2014 04:31:48 +0000 (22:31 -0600)
committerZack Cerza <zack@cerza.org>
Fri, 7 Mar 2014 04:32:29 +0000 (22:32 -0600)
Signed-off-by: Zack Cerza <zack.cerza@inktank.com>
teuthology/misc.py

index 280cee4b287015ff1afcfef35c5ccbb120ee979f..092f5f55b268e1ac915464713aa69bd43243d2f5 100644 (file)
@@ -22,6 +22,7 @@ import re
 from teuthology import safepath
 from .orchestra import run
 from .config import config
+from .contextutil import safe_while
 
 log = logging.getLogger(__name__)
 
@@ -816,25 +817,29 @@ def get_scratch_devices(remote):
 
 
 def wait_until_healthy(ctx, remote):
-    """Wait until a Ceph cluster is healthy."""
+    """
+    Wait until a Ceph cluster is healthy. Give up after 15min.
+    """
     testdir = get_testdir(ctx)
-    while True:
-        r = remote.run(
-            args=[
-                'adjust-ulimits',
-                'ceph-coverage',
-                '{tdir}/archive/coverage'.format(tdir=testdir),
-                'ceph',
-                'health',
-                ],
-            stdout=StringIO(),
-            logger=log.getChild('health'),
-            )
-        out = r.stdout.getvalue()
-        log.debug('Ceph health: %s', out.rstrip('\n'))
-        if out.split(None, 1)[0] == 'HEALTH_OK':
-            break
-        time.sleep(1)
+    with safe_while(sleep=5, increment=0, tries=180) as timeout:
+        while True:
+            timeout()
+            r = remote.run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'ceph',
+                    'health',
+                    ],
+                stdout=StringIO(),
+                logger=log.getChild('health'),
+                )
+            out = r.stdout.getvalue()
+            log.debug('Ceph health: %s', out.rstrip('\n'))
+            if out.split(None, 1)[0] == 'HEALTH_OK':
+                break
+            time.sleep(1)
 
 
 def wait_until_osds_up(ctx, cluster, remote):