qa/tasks: Enhance wait_until_true() to check & retry recovery progress

author Sridhar Seshasayee <sseshasa@redhat.com>

Wed, 19 May 2021 15:22:15 +0000 (20:52 +0530)

committer Sridhar Seshasayee <sseshasa@redhat.com>

Wed, 2 Jun 2021 08:49:48 +0000 (14:19 +0530)
author Sridhar Seshasayee <sseshasa@redhat.com>
Wed, 19 May 2021 15:22:15 +0000 (20:52 +0530)
committer Sridhar Seshasayee <sseshasa@redhat.com>
Wed, 2 Jun 2021 08:49:48 +0000 (14:19 +0530)
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py

index 7f1dccc986d9c911f65e3c262238c1b20ec2c573..2ca17b34ce37b007432bc52c6786628b04468606 100644 (file)
--- a/qa/tasks/ceph_test_case.py
+++ b/qa/tasks/ceph_test_case.py
@@ -192,16 +192,22 @@ class CephTestCase(unittest.TestCase):
          log.debug("wait_until_equal: success")
  
      @classmethod
-    def wait_until_true(cls, condition, timeout, period=5):
+    def wait_until_true(cls, condition, timeout, check_fn=None, period=5):
          elapsed = 0
+        retry_count = 0
          while True:
              if condition():
-                log.debug("wait_until_true: success in {0}s".format(elapsed))
+                log.debug("wait_until_true: success in {0}s and {1} retries".format(elapsed, retry_count))
                  return
              else:
                  if elapsed >= timeout:
-                    raise TestTimeoutError("Timed out after {0}s".format(elapsed))
+                    if check_fn and check_fn() and retry_count < 5:
+                        elapsed = 0
+                        retry_count += 1
+                        log.debug("wait_until_true: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
+                    else:
+                        raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count))
                  else:
-                    log.debug("wait_until_true: waiting (timeout={0})...".format(timeout))
+                    log.debug("wait_until_true: waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
                  time.sleep(period)
                  elapsed += period
diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py

index 0e03c2b663acfeba31071147fad9630714c5ff63..cf992e22d5cb90c2559bf9f2df31536b805cd7a6 100644 (file)
--- a/qa/tasks/mgr/test_progress.py
+++ b/qa/tasks/mgr/test_progress.py
@@ -243,6 +243,13 @@ class TestProgress(MgrTestCase):
              assert ev_id in live_ids
              return False
  
+    def _is_inprogress_or_complete(self, ev_id):
+        for ev in self._events_in_progress():
+            if ev['id'] == ev_id:
+                return ev['progress'] > 0
+        # check if the event completed
+        return self._is_complete(ev_id)
+
      def tearDown(self):
          if self.POOL in self.mgr_cluster.mon_manager.pools:
              self.mgr_cluster.mon_manager.remove_pool(self.POOL)
@@ -396,5 +403,6 @@ class TestProgress(MgrTestCase):
          log.info(json.dumps(ev1, indent=1))
  
          self.wait_until_true(lambda: self._is_complete(ev1['id']),
+                             check_fn=lambda: self._is_inprogress_or_complete(ev1['id']),
                               timeout=self.RECOVERY_PERIOD)
          self.assertTrue(self._is_quiet())
author	Sridhar Seshasayee <sseshasa@redhat.com>
	Wed, 19 May 2021 15:22:15 +0000 (20:52 +0530)
committer	Sridhar Seshasayee <sseshasa@redhat.com>
	Wed, 2 Jun 2021 08:49:48 +0000 (14:19 +0530)
qa/tasks/ceph_test_case.py		patch \| blob \| history
qa/tasks/mgr/test_progress.py		patch \| blob \| history