qa/tasks/ceph_test_case.py: update health check helpers

author Sage Weil <sage@redhat.com>

Tue, 11 Jul 2017 03:39:31 +0000 (23:39 -0400)

committer Sage Weil <sage@redhat.com>

Wed, 12 Jul 2017 16:52:03 +0000 (12:52 -0400)
author Sage Weil <sage@redhat.com>
Tue, 11 Jul 2017 03:39:31 +0000 (23:39 -0400)
committer Sage Weil <sage@redhat.com>
Wed, 12 Jul 2017 16:52:03 +0000 (12:52 -0400)
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py

index 270c18553edbbfdf9e30139bbb9d0ccc454704d5..47f3921347dbd980ed0050d7576af5e917e1a23b 100644 (file)
--- a/qa/tasks/ceph_test_case.py
+++ b/qa/tasks/ceph_test_case.py
@@ -83,7 +83,8 @@ class CephTestCase(unittest.TestCase):
          """
          def seen_health_warning():
              health = self.ceph_cluster.mon_manager.get_mon_health()
-            summary_strings = [s['summary'] for s in health['summary']]
+            codes = [s for s in health['checks']]
+            summary_strings = [s[1]['message'] for s in health['checks'].iteritems()]
              if len(summary_strings) == 0:
                  log.debug("Not expected number of summary strings ({0})".format(summary_strings))
                  return False
@@ -91,6 +92,8 @@ class CephTestCase(unittest.TestCase):
                  for ss in summary_strings:
                      if pattern in ss:
                           return True
+                if pattern in codes:
+                    return True
  
              log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
              return False
@@ -103,7 +106,7 @@ class CephTestCase(unittest.TestCase):
          """
          def is_clear():
              health = self.ceph_cluster.mon_manager.get_mon_health()
-            return len(health['summary']) == 0
+            return len(health['checks']) == 0
  
          self.wait_until_true(is_clear, timeout)
  
diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py

index 033d8dde902c62df507dd4099c879ec76a5139a0..c0aa2e4c70fae8e085b28a900cd635e8beb86249 100644 (file)
--- a/qa/tasks/cephfs/test_auto_repair.py
+++ b/qa/tasks/cephfs/test_auto_repair.py
@@ -81,7 +81,7 @@ class TestMDSAutoRepair(CephFSTestCase):
          self.assertTrue(writer.finished)
  
          # The MDS should report its readonly health state to the mon
-        self.wait_for_health("MDS in read-only mode", timeout=30)
+        self.wait_for_health("MDS_READ_ONLY", timeout=30)
  
          # restart mds to make it writable
          self.fs.mds_fail_restart()
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py

index f25cb4a21f33bf6213a75e74f40144f68be56d51..d8675fdad8b16eebed38ce98308929003838e374 100644 (file)
--- a/qa/tasks/cephfs/test_client_limits.py
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -62,12 +62,12 @@ class TestClientLimits(CephFSTestCase):
          # MDS should not be happy about that, as the client is failing to comply
          # with the SESSION_RECALL messages it is being sent
          mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout"))
-        self.wait_for_health("failing to respond to cache pressure",
+        self.wait_for_health("MDS_HEALTH_CLIENT_RECALL",
                  mds_recall_state_timeout + 10)
  
          # We can also test that the MDS health warning for oversized
          # cache is functioning as intended.
-        self.wait_for_health("Too many inodes in cache",
+        self.wait_for_health("MDS_CACHE_OVERSIZED",
                  mds_recall_state_timeout + 10)
  
          # When the client closes the files, it should retain only as many caps as allowed
@@ -123,7 +123,7 @@ class TestClientLimits(CephFSTestCase):
          # After mds_revoke_cap_timeout, we should see a health warning (extra lag from
          # MDS beacon period)
          mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout"))
-        self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10)
+        self.wait_for_health("MDS_CLIENT_RECALL", mds_revoke_cap_timeout + 10)
  
          # Client B should still be stuck
          self.assertFalse(rproc.finished)
@@ -163,7 +163,7 @@ class TestClientLimits(CephFSTestCase):
          self.mount_a.create_n_files("testdir/file2", 5, True)
  
          # Wait for the health warnings. Assume mds can handle 10 request per second at least
-        self.wait_for_health("failing to advance its oldest client/flush tid", max_requests / 10)
+        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10)
  
      def _test_client_cache_size(self, mount_subdir):
          """
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py

index 1bb2ff7322b339b5223d882009211e56b69d1049..53c2d5e301e7d0478aa1c3ca88cac58cbbc936bf 100644 (file)
--- a/qa/tasks/cephfs/test_failover.py
+++ b/qa/tasks/cephfs/test_failover.py
@@ -112,7 +112,7 @@ class TestFailover(CephFSTestCase):
          victim = standbys.pop()
          self.fs.mds_stop(victim)
          log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("insufficient standby daemons available", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
  
          # restart the standby, see that he becomes a standby, check health clears
          self.fs.mds_restart(victim)
@@ -127,7 +127,7 @@ class TestFailover(CephFSTestCase):
          self.assertGreaterEqual(len(standbys), 1)
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
          log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("insufficient standby daemons available", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
  
          # Set it to 0
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
author	Sage Weil <sage@redhat.com>
	Tue, 11 Jul 2017 03:39:31 +0000 (23:39 -0400)
committer	Sage Weil <sage@redhat.com>
	Wed, 12 Jul 2017 16:52:03 +0000 (12:52 -0400)
qa/tasks/ceph_test_case.py		patch \| blob \| history
qa/tasks/cephfs/test_auto_repair.py		patch \| blob \| history
qa/tasks/cephfs/test_client_limits.py		patch \| blob \| history
qa/tasks/cephfs/test_failover.py		patch \| blob \| history