]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: add filesystem sync stuck test support
authorXiubo Li <xiubli@redhat.com>
Tue, 12 Apr 2022 04:37:13 +0000 (12:37 +0800)
committerXiubo Li <xiubli@redhat.com>
Thu, 2 Jun 2022 04:40:17 +0000 (12:40 +0800)
This will test the sync of the filesystem, which maybe stuck for
at most 5 seconds. This was because the related code will wait
for all the unsafe requests to get safe reply from MDSes, but the
MDSes just think that it's unnecessary to flush the mdlog immediately
after early reply, and the mdlog will be flushed every 5 seconds
in the tick thread.

This should have been fixed in kclient and libcephfs by triggering
mdlog flush before waiting requests' safe reply.

Fixes: https://tracker.ceph.com/issues/55283
Signed-off-by: Xiubo Li <xiubli@redhat.com>
(cherry picked from commit b6fc5480f6ba6352fa72062e1376d0dd6b9074cd)

qa/tasks/cephfs/test_misc.py

index 2e5387aecf7d2c14561188ed3461b0b7c13b8d44..921ae8166bf5a8b15375cff4fbcabd4fce5402f8 100644 (file)
@@ -8,6 +8,7 @@ import platform
 import time
 import json
 import logging
+import os
 
 log = logging.getLogger(__name__)
 
@@ -235,6 +236,36 @@ class TestMisc(CephFSTestCase):
         self.assertEqual(lsflags["allow_multimds_snaps"], True)
         self.assertEqual(lsflags["allow_standby_replay"], True)
 
+    def test_filesystem_sync_stuck_for_around_5s(self):
+        """
+        To check whether the fsync will be stuck to wait for the mdlog to be
+        flushed for at most 5 seconds.
+        """
+
+        dir_path = "fsync_do_not_wait_mdlog_testdir"
+        self.mount_a.run_shell(["mkdir", dir_path])
+
+        # run create/delete directories and test the sync time duration
+        for i in range(300):
+            for j in range(5):
+                self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
+            start = time.time()
+            self.mount_a.run_shell(["sync"])
+            duration = time.time() - start
+            log.info(f"mkdir i = {i}, duration = {duration}")
+            self.assertLess(duration, 4)
+
+            for j in range(5):
+                self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
+            start = time.time()
+            self.mount_a.run_shell(["sync"])
+            duration = time.time() - start
+            log.info(f"rmdir i = {i}, duration = {duration}")
+            self.assertLess(duration, 4)
+
+        self.mount_a.run_shell(["rm", "-rf", dir_path])
+
+
 class TestCacheDrop(CephFSTestCase):
     CLIENTS_REQUIRED = 1