qa: move wait_until_scrub_complete helper to filesystem class

author Xiubo Li <xiubli@redhat.com>

Wed, 23 Dec 2020 01:25:37 +0000 (09:25 +0800)

committer Xiubo Li <xiubli@redhat.com>

Mon, 15 Mar 2021 05:23:29 +0000 (13:23 +0800)
author Xiubo Li <xiubli@redhat.com>
Wed, 23 Dec 2020 01:25:37 +0000 (09:25 +0800)
committer Xiubo Li <xiubli@redhat.com>
Mon, 15 Mar 2021 05:23:29 +0000 (13:23 +0800)
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py

index 9d689456c9de4a5521aed15e642b6ea5f1e23fa8..f72ca26c3daa199e0e25a9dae45b439e306b3522 100644 (file)
--- a/qa/tasks/cephfs/cephfs_test_case.py
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -13,9 +13,6 @@ from teuthology.misc import sudo_write_file
  from teuthology.orchestra import run
  from teuthology.orchestra.run import CommandFailedError
  
-from teuthology.contextutil import safe_while
-
-
  log = logging.getLogger(__name__)
  
  def for_teuthology(f):
@@ -381,13 +378,11 @@ class CephFSTestCase(CephTestCase):
          except contextutil.MaxWhileTries as e:
              raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
  
-    def _wait_until_scrub_complete(self, path="/", recursive=True):
+    def _wait_until_scrub_complete(self, path="/", recursive=True, timeout=100):
          out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else [])
-        with safe_while(sleep=10, tries=10) as proceed:
-            while proceed():
-                out_json = self.fs.rank_tell(["scrub", "status"])
-                if out_json['status'] == "no active scrubs running":
-                    break;
+        if not self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"],
+                                                 sleep=10, timeout=timeout):
+            log.info("timed out waiting for scrub to complete")
  
      def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None):
          try:
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py

index 226eb02d1922d94462c9855c3f87c7464d014674..0dd9d5e048e65dbd4c40d6b6b488299325c01568 100644 (file)
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -18,6 +18,7 @@ from teuthology.exceptions import CommandFailedError
  from teuthology import misc
  from teuthology.nuke import clear_firewall
  from teuthology.parallel import parallel
+from teuthology import contextutil
  from tasks.ceph_manager import write_conf
  from tasks import ceph_manager
  
@@ -1575,3 +1576,26 @@ class Filesystem(MDSCluster):
          assert(new_max_mds < oldmax)
          self.set_max_mds(new_max_mds)
          return self.wait_for_daemons()
+
+    def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30, timeout=300):
+        # time out after "timeout" seconds and assume as done
+        if result is None:
+            result = "no active scrubs running"
+        with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+            while proceed():
+                out_json = self.rank_tell(["scrub", "status"], rank=rank)
+                assert out_json is not None
+                if result in out_json['status']:
+                    log.info("all active scrubs completed")
+                    return True
+
+                if tag is not None:
+                    status = out_json['scrubs'][tag]
+                    if status is not None:
+                        log.info(f"scrub status for tag:{tag} - {status}")
+                    else:
+                        log.info(f"scrub has completed for tag:{tag}")
+                        return True
+
+        # timed out waiting for scrub to complete
+        return False
diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py

index 381dcbaf85e90bcc1292e41216c3e13ea1eb94ec..6463815a53e5de538dccaef6d092c03731e7e598 100644 (file)
--- a/qa/tasks/cephfs/test_scrub_checks.py
+++ b/qa/tasks/cephfs/test_scrub_checks.py
@@ -77,7 +77,7 @@ done
  
          # abort and verify
          self._abort_scrub(0)
-        self.wait_until_true(lambda: "no active" in self._get_scrub_status()['status'], 30)
+        self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
  
          # sleep enough to fetch updated task status
          checked = self._check_task_status_na()
@@ -298,7 +298,7 @@ class TestScrubChecks(CephFSTestCase):
          self.assertFalse(_check_and_clear_damage(ino, "backtrace"));
          self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
          self.tell_command(mds_rank, command, success_validator)
-        self.wait_until_true(lambda: "no active" in _get_scrub_status()['status'], 30)
+        self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
          self.assertTrue(_check_and_clear_damage(ino, "backtrace"));
  
          command = "flush_path /"
diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py

index 15393c209928e0a0d8ebd92a6c2125984aef6e99..c8866c1901a4ca30a642209b157b9500361254a7 100644 (file)
--- a/qa/tasks/fwd_scrub.py
+++ b/qa/tasks/fwd_scrub.py
@@ -8,8 +8,6 @@ from gevent import sleep, GreenletExit
  from gevent.greenlet import Greenlet
  from gevent.event import Event
  from teuthology import misc as teuthology
-from teuthology import contextutil
-from teuthology.orchestra.run import CommandFailedError
  
  from tasks import ceph_manager
  from tasks.cephfs.filesystem import MDSCluster, Filesystem
@@ -73,29 +71,8 @@ class ForwardScrubber(Thrasher, Greenlet):
          assert out_json['return_code'] == 0
          assert out_json['mode'] == 'asynchronous'
  
-        return self._wait_until_scrub_complete(tag)
-
-    def _wait_until_scrub_complete(self, tag):
-        # time out after scrub_timeout seconds and assume as done
-        with contextutil.safe_while(sleep=30, tries=self.scrub_timeout//30) as proceed:
-            while proceed():
-                try:
-                    out_json = self.fs.rank_tell(["scrub", "status"])
-                    assert out_json is not None
-                    if out_json['status'] == "no active scrubs running":
-                        self.logger.info("all active scrubs completed")
-                        return
-
-                    status = out_json['scrubs'][tag]
-                    if status is not None:
-                        self.logger.info(f"scrub status for tag:{tag} - {status}")
-                    else:
-                        self.logger.info(f"scrub has completed for tag:{tag}")
-                        return
-                except CommandFailedError as e:
-                    self.logger.exception(f"exception while getting scrub status: {e}")
-                    self.logger.info("retrying scrub status command in a while")
-                    pass
+        return self.fs.wait_until_scrub_complete(tag=tag, sleep=30,
+                                                 timeout=self.scrub_timeout)
  
  def stop_all_fwd_scrubbers(thrashers):
      for thrasher in thrashers:
author	Xiubo Li <xiubli@redhat.com>
	Wed, 23 Dec 2020 01:25:37 +0000 (09:25 +0800)
committer	Xiubo Li <xiubli@redhat.com>
	Mon, 15 Mar 2021 05:23:29 +0000 (13:23 +0800)
qa/tasks/cephfs/cephfs_test_case.py		patch \| blob \| history
qa/tasks/cephfs/filesystem.py		patch \| blob \| history
qa/tasks/cephfs/test_scrub_checks.py		patch \| blob \| history
qa/tasks/fwd_scrub.py		patch \| blob \| history