From 25717f7e84e2173e2d3bcc6e8886992bbb50b269 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Mon, 10 Jul 2017 23:39:31 -0400
Subject: [PATCH] qa/tasks/ceph_test_case.py: update health check helpers

Signed-off-by: Sage Weil <sage@redhat.com>
---
 qa/tasks/ceph_test_case.py            | 7 +++++--
 qa/tasks/cephfs/test_auto_repair.py   | 2 +-
 qa/tasks/cephfs/test_client_limits.py | 8 ++++----
 qa/tasks/cephfs/test_failover.py      | 4 ++--
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py
index 270c18553ed..47f3921347d 100644
--- a/qa/tasks/ceph_test_case.py
+++ b/qa/tasks/ceph_test_case.py
@@ -83,7 +83,8 @@ class CephTestCase(unittest.TestCase):
         """
         def seen_health_warning():
             health = self.ceph_cluster.mon_manager.get_mon_health()
-            summary_strings = [s['summary'] for s in health['summary']]
+            codes = [s for s in health['checks']]
+            summary_strings = [s[1]['message'] for s in health['checks'].iteritems()]
             if len(summary_strings) == 0:
                 log.debug("Not expected number of summary strings ({0})".format(summary_strings))
                 return False
@@ -91,6 +92,8 @@ class CephTestCase(unittest.TestCase):
                 for ss in summary_strings:
                     if pattern in ss:
                          return True
+                if pattern in codes:
+                    return True
 
             log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
             return False
@@ -103,7 +106,7 @@ class CephTestCase(unittest.TestCase):
         """
         def is_clear():
             health = self.ceph_cluster.mon_manager.get_mon_health()
-            return len(health['summary']) == 0
+            return len(health['checks']) == 0
 
         self.wait_until_true(is_clear, timeout)
 
diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py
index 033d8dde902..c0aa2e4c70f 100644
--- a/qa/tasks/cephfs/test_auto_repair.py
+++ b/qa/tasks/cephfs/test_auto_repair.py
@@ -81,7 +81,7 @@ class TestMDSAutoRepair(CephFSTestCase):
         self.assertTrue(writer.finished)
 
         # The MDS should report its readonly health state to the mon
-        self.wait_for_health("MDS in read-only mode", timeout=30)
+        self.wait_for_health("MDS_READ_ONLY", timeout=30)
 
         # restart mds to make it writable
         self.fs.mds_fail_restart()
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
index f25cb4a21f3..d8675fdad8b 100644
--- a/qa/tasks/cephfs/test_client_limits.py
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -62,12 +62,12 @@ class TestClientLimits(CephFSTestCase):
         # MDS should not be happy about that, as the client is failing to comply
         # with the SESSION_RECALL messages it is being sent
         mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout"))
-        self.wait_for_health("failing to respond to cache pressure",
+        self.wait_for_health("MDS_HEALTH_CLIENT_RECALL",
                 mds_recall_state_timeout + 10)
 
         # We can also test that the MDS health warning for oversized
         # cache is functioning as intended.
-        self.wait_for_health("Too many inodes in cache",
+        self.wait_for_health("MDS_CACHE_OVERSIZED",
                 mds_recall_state_timeout + 10)
 
         # When the client closes the files, it should retain only as many caps as allowed
@@ -123,7 +123,7 @@ class TestClientLimits(CephFSTestCase):
         # After mds_revoke_cap_timeout, we should see a health warning (extra lag from
         # MDS beacon period)
         mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout"))
-        self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10)
+        self.wait_for_health("MDS_CLIENT_RECALL", mds_revoke_cap_timeout + 10)
 
         # Client B should still be stuck
         self.assertFalse(rproc.finished)
@@ -163,7 +163,7 @@ class TestClientLimits(CephFSTestCase):
         self.mount_a.create_n_files("testdir/file2", 5, True)
 
         # Wait for the health warnings. Assume mds can handle 10 request per second at least
-        self.wait_for_health("failing to advance its oldest client/flush tid", max_requests / 10)
+        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10)
 
     def _test_client_cache_size(self, mount_subdir):
         """
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
index 1bb2ff7322b..53c2d5e301e 100644
--- a/qa/tasks/cephfs/test_failover.py
+++ b/qa/tasks/cephfs/test_failover.py
@@ -112,7 +112,7 @@ class TestFailover(CephFSTestCase):
         victim = standbys.pop()
         self.fs.mds_stop(victim)
         log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("insufficient standby daemons available", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
 
         # restart the standby, see that he becomes a standby, check health clears
         self.fs.mds_restart(victim)
@@ -127,7 +127,7 @@ class TestFailover(CephFSTestCase):
         self.assertGreaterEqual(len(standbys), 1)
         self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
         log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("insufficient standby daemons available", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
 
         # Set it to 0
         self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
-- 
2.39.5