]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/ceph: wait for pg stats to flush in healthy check
authorSage Weil <sage@redhat.com>
Mon, 24 Jul 2017 21:05:11 +0000 (17:05 -0400)
committerSage Weil <sage@redhat.com>
Thu, 27 Jul 2017 16:10:27 +0000 (12:10 -0400)
Signed-off-by: Sage Weil <sage@redhat.com>
qa/tasks/ceph.py
qa/tasks/ceph_manager.py

index 5fe19c1ed1f8a474da65a3769d9c054a70c11b11..05c4c7d90abf3eda87c8a7e81bedec39637e0715 100644 (file)
@@ -1222,7 +1222,13 @@ def healthy(ctx, config):
     """
     config = config if isinstance(config, dict) else dict()
     cluster_name = config.get('cluster', 'ceph')
-    log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
+    log.info('Waiting until %s daemons up and pgs clean...', cluster_name)
+    manager = ctx.managers[cluster_name]
+    try:
+        manager.wait_for_mgr_available()
+    except run.CommandFailedError:
+        log.info('ignoring mgr wait error, probably testing upgrade')
+
     firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
     (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
     teuthology.wait_until_osds_up(
@@ -1231,6 +1237,14 @@ def healthy(ctx, config):
         remote=mon0_remote,
         ceph_cluster=cluster_name,
     )
+
+    try:
+        manager.flush_all_pg_stats()
+    except run.CommandFailedError:
+        log.info('ignoring flush pg stats error, probably testing upgrade')
+    manager.wait_for_clean()
+
+    log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
     teuthology.wait_until_healthy(
         ctx,
         remote=mon0_remote,
index 757a7faf23af9fd257f281be6525a7e9b476aa88..b2f687e49bb2a89030d30a8286248f871402e3a3 100644 (file)
@@ -1989,6 +1989,10 @@ class CephManager:
         """
         return self.get_osd_dump_json()['osds']
 
+    def get_mgr_dump(self):
+        out = self.raw_cluster_cmd('mgr', 'dump', '--format=json')
+        return json.loads(out)
+
     def get_stuck_pgs(self, type_, threshold):
         """
         :returns: stuck pg information from the cluster
@@ -2182,6 +2186,20 @@ class CephManager:
         for pool in pools:
             self.wait_for_pool(pool)
 
+    def is_mgr_available(self):
+        x = self.get_mgr_dump()
+        return x.get('available', False)
+
+    def wait_for_mgr_available(self, timeout=None):
+        self.log("waiting for mgr available")
+        start = time.time()
+        while not self.is_mgr_available():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_for_mgr_available'
+            time.sleep(3)
+        self.log("mgr available!")
+
     def wait_for_recovery(self, timeout=None):
         """
         Check peering. When this exists, we have recovered.