]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: add file sync stuck test support
authorXiubo Li <xiubli@redhat.com>
Tue, 12 Apr 2022 11:40:02 +0000 (19:40 +0800)
committerXiubo Li <xiubli@redhat.com>
Thu, 2 Jun 2022 04:41:03 +0000 (12:41 +0800)
This will test the file sync of a directory, which maybe stuck for
at most 5 seconds. This was because the related code will wait for
all the unsafe requests to get safe reply from MDSes, but the MDSes
just think that it's unnecessary to flush the mdlog immediately
after early reply, and the mdlog will be flushed every 5 seconds
in the tick thread.

This should have been fixed in kclient and libcephfs by triggering
mdlog flush before waiting requests' safe reply.

Fixes: https://tracker.ceph.com/issues/55283
Signed-off-by: Xiubo Li <xiubli@redhat.com>
(cherry picked from commit 3db3b4e2a4b853192c5b30c9594947ba45f96e03)

qa/tasks/cephfs/test_misc.py

index 921ae8166bf5a8b15375cff4fbcabd4fce5402f8..5fd6b94117399c97187667070e0b6877dab038a7 100644 (file)
@@ -3,6 +3,7 @@ from io import StringIO
 from tasks.cephfs.fuse_mount import FuseMount
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.exceptions import CommandFailedError
+from textwrap import dedent
 import errno
 import platform
 import time
@@ -236,35 +237,62 @@ class TestMisc(CephFSTestCase):
         self.assertEqual(lsflags["allow_multimds_snaps"], True)
         self.assertEqual(lsflags["allow_standby_replay"], True)
 
-    def test_filesystem_sync_stuck_for_around_5s(self):
-        """
-        To check whether the fsync will be stuck to wait for the mdlog to be
-        flushed for at most 5 seconds.
-        """
-
-        dir_path = "fsync_do_not_wait_mdlog_testdir"
+    def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False):
         self.mount_a.run_shell(["mkdir", dir_path])
 
+        sync_dir_pyscript = dedent("""
+                import os
+
+                path = "{path}"
+                dfd = os.open(path, os.O_DIRECTORY)
+                os.fsync(dfd)
+                os.close(dfd)
+            """.format(path=dir_path))
+
         # run create/delete directories and test the sync time duration
         for i in range(300):
             for j in range(5):
                 self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
             start = time.time()
-            self.mount_a.run_shell(["sync"])
+            if file_sync:
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+            else:
+                self.mount_a.run_shell(["sync"])
             duration = time.time() - start
-            log.info(f"mkdir i = {i}, duration = {duration}")
+            log.info(f"sync mkdir i = {i}, duration = {duration}")
             self.assertLess(duration, 4)
 
             for j in range(5):
                 self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
             start = time.time()
-            self.mount_a.run_shell(["sync"])
+            if file_sync:
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+            else:
+                self.mount_a.run_shell(["sync"])
             duration = time.time() - start
-            log.info(f"rmdir i = {i}, duration = {duration}")
+            log.info(f"sync rmdir i = {i}, duration = {duration}")
             self.assertLess(duration, 4)
 
         self.mount_a.run_shell(["rm", "-rf", dir_path])
 
+    def test_filesystem_sync_stuck_for_around_5s(self):
+        """
+        To check whether the fsync will be stuck to wait for the mdlog to be
+        flushed for at most 5 seconds.
+        """
+
+        dir_path = "filesystem_sync_do_not_wait_mdlog_testdir"
+        self._test_sync_stuck_for_around_5s(dir_path)
+
+    def test_file_sync_stuck_for_around_5s(self):
+        """
+        To check whether the filesystem sync will be stuck to wait for the
+        mdlog to be flushed for at most 5 seconds.
+        """
+
+        dir_path = "file_sync_do_not_wait_mdlog_testdir"
+        self._test_sync_stuck_for_around_5s(dir_path, True)
+
 
 class TestCacheDrop(CephFSTestCase):
     CLIENTS_REQUIRED = 1