From 25717f7e84e2173e2d3bcc6e8886992bbb50b269 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 10 Jul 2017 23:39:31 -0400 Subject: [PATCH] qa/tasks/ceph_test_case.py: update health check helpers Signed-off-by: Sage Weil --- qa/tasks/ceph_test_case.py | 7 +++++-- qa/tasks/cephfs/test_auto_repair.py | 2 +- qa/tasks/cephfs/test_client_limits.py | 8 ++++---- qa/tasks/cephfs/test_failover.py | 4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py index 270c18553ed..47f3921347d 100644 --- a/qa/tasks/ceph_test_case.py +++ b/qa/tasks/ceph_test_case.py @@ -83,7 +83,8 @@ class CephTestCase(unittest.TestCase): """ def seen_health_warning(): health = self.ceph_cluster.mon_manager.get_mon_health() - summary_strings = [s['summary'] for s in health['summary']] + codes = [s for s in health['checks']] + summary_strings = [s[1]['message'] for s in health['checks'].iteritems()] if len(summary_strings) == 0: log.debug("Not expected number of summary strings ({0})".format(summary_strings)) return False @@ -91,6 +92,8 @@ class CephTestCase(unittest.TestCase): for ss in summary_strings: if pattern in ss: return True + if pattern in codes: + return True log.debug("Not found expected summary strings yet ({0})".format(summary_strings)) return False @@ -103,7 +106,7 @@ class CephTestCase(unittest.TestCase): """ def is_clear(): health = self.ceph_cluster.mon_manager.get_mon_health() - return len(health['summary']) == 0 + return len(health['checks']) == 0 self.wait_until_true(is_clear, timeout) diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py index 033d8dde902..c0aa2e4c70f 100644 --- a/qa/tasks/cephfs/test_auto_repair.py +++ b/qa/tasks/cephfs/test_auto_repair.py @@ -81,7 +81,7 @@ class TestMDSAutoRepair(CephFSTestCase): self.assertTrue(writer.finished) # The MDS should report its readonly health state to the mon - self.wait_for_health("MDS in read-only mode", timeout=30) + self.wait_for_health("MDS_READ_ONLY", timeout=30) # restart mds to make it writable self.fs.mds_fail_restart() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index f25cb4a21f3..d8675fdad8b 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -62,12 +62,12 @@ class TestClientLimits(CephFSTestCase): # MDS should not be happy about that, as the client is failing to comply # with the SESSION_RECALL messages it is being sent mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout")) - self.wait_for_health("failing to respond to cache pressure", + self.wait_for_health("MDS_HEALTH_CLIENT_RECALL", mds_recall_state_timeout + 10) # We can also test that the MDS health warning for oversized # cache is functioning as intended. - self.wait_for_health("Too many inodes in cache", + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_state_timeout + 10) # When the client closes the files, it should retain only as many caps as allowed @@ -123,7 +123,7 @@ class TestClientLimits(CephFSTestCase): # After mds_revoke_cap_timeout, we should see a health warning (extra lag from # MDS beacon period) mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout")) - self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10) + self.wait_for_health("MDS_CLIENT_RECALL", mds_revoke_cap_timeout + 10) # Client B should still be stuck self.assertFalse(rproc.finished) @@ -163,7 +163,7 @@ class TestClientLimits(CephFSTestCase): self.mount_a.create_n_files("testdir/file2", 5, True) # Wait for the health warnings. Assume mds can handle 10 request per second at least - self.wait_for_health("failing to advance its oldest client/flush tid", max_requests / 10) + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10) def _test_client_cache_size(self, mount_subdir): """ diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 1bb2ff7322b..53c2d5e301e 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -112,7 +112,7 @@ class TestFailover(CephFSTestCase): victim = standbys.pop() self.fs.mds_stop(victim) log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("insufficient standby daemons available", grace*2) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) # restart the standby, see that he becomes a standby, check health clears self.fs.mds_restart(victim) @@ -127,7 +127,7 @@ class TestFailover(CephFSTestCase): self.assertGreaterEqual(len(standbys), 1) self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("insufficient standby daemons available", grace*2) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) # Set it to 0 self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') -- 2.39.5