verify all osds start before checking health

author Sage Weil <sage@newdream.net>

Sun, 8 Jan 2012 23:14:18 +0000 (15:14 -0800)

committer Sage Weil <sage@newdream.net>

Wed, 11 Jan 2012 20:54:08 +0000 (12:54 -0800)
author Sage Weil <sage@newdream.net>
Sun, 8 Jan 2012 23:14:18 +0000 (15:14 -0800)
committer Sage Weil <sage@newdream.net>
Wed, 11 Jan 2012 20:54:08 +0000 (12:54 -0800)
diff --git a/teuthology/misc.py b/teuthology/misc.py

index 00c674f5e2425aec56b02bf1694506c02de9ab61..2b144c73b5c92be2c9f24e869a74d63123983afc 100644 (file)
--- a/teuthology/misc.py
+++ b/teuthology/misc.py
@@ -9,6 +9,7 @@ import time
  import urllib2
  import urlparse
  import yaml
+import json
  
  from .orchestra import run
  
@@ -286,6 +287,31 @@ def wait_until_healthy(remote):
              break
          time.sleep(1)
  
+def wait_until_osds_up(cluster, remote):
+    """Wait until all Ceph OSDs are booted."""
+    num_osds = num_instances_of_type(cluster, 'osd')
+    while True:
+        r = remote.run(
+            args=[
+                '/tmp/cephtest/enable-coredump',
+                '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
+                '/tmp/cephtest/archive/coverage',
+                '/tmp/cephtest/binary/usr/local/bin/ceph',
+                '-c', '/tmp/cephtest/ceph.conf',
+                '--concise',
+                'osd', 'dump', '--format=json'
+                ],
+            stdout=StringIO(),
+            logger=log.getChild('health'),
+            )
+        out = r.stdout.getvalue()
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        up = len(j['osds'])
+        log.debug('%d of %d OSDs are up' % (up, num_osds))
+        if up == num_osds:
+            break
+        time.sleep(1)
+
  def wait_until_fuse_mounted(remote, fuse, mountpoint):
      while True:
          proc = remote.run(
diff --git a/teuthology/task/ceph.py b/teuthology/task/ceph.py

index ffd7919c284bb7ed528608eba58c4ba513315e3a..931212650bfa7384d6930904dc71b1aa247b0a6c 100644 (file)
--- a/teuthology/task/ceph.py
+++ b/teuthology/task/ceph.py
@@ -904,6 +904,10 @@ def healthy(ctx, config):
      log.info('Waiting until ceph is healthy...')
      firstmon = teuthology.get_first_mon(ctx, config)
      (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    teuthology.wait_until_osds_up(
+        cluster=ctx.cluster,
+        remote=mon0_remote
+        )
      teuthology.wait_until_healthy(
          remote=mon0_remote,
          )
author	Sage Weil <sage@newdream.net>
	Sun, 8 Jan 2012 23:14:18 +0000 (15:14 -0800)
committer	Sage Weil <sage@newdream.net>
	Wed, 11 Jan 2012 20:54:08 +0000 (12:54 -0800)
teuthology/misc.py		patch \| blob \| history
teuthology/task/ceph.py		patch \| blob \| history