From: Xiubo Li Date: Tue, 12 Apr 2022 11:40:02 +0000 (+0800) Subject: qa: add file sync stuck test support X-Git-Tag: v17.2.1~10^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fc7c9f6f50522485381516d988bfe10ab46496c5;p=ceph.git qa: add file sync stuck test support This will test the file sync of a directory, which maybe stuck for at most 5 seconds. This was because the related code will wait for all the unsafe requests to get safe reply from MDSes, but the MDSes just think that it's unnecessary to flush the mdlog immediately after early reply, and the mdlog will be flushed every 5 seconds in the tick thread. This should have been fixed in kclient and libcephfs by triggering mdlog flush before waiting requests' safe reply. Fixes: https://tracker.ceph.com/issues/55283 Signed-off-by: Xiubo Li (cherry picked from commit 3db3b4e2a4b853192c5b30c9594947ba45f96e03) --- diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 921ae8166bf5..5fd6b9411739 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -3,6 +3,7 @@ from io import StringIO from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.exceptions import CommandFailedError +from textwrap import dedent import errno import platform import time @@ -236,35 +237,62 @@ class TestMisc(CephFSTestCase): self.assertEqual(lsflags["allow_multimds_snaps"], True) self.assertEqual(lsflags["allow_standby_replay"], True) - def test_filesystem_sync_stuck_for_around_5s(self): - """ - To check whether the fsync will be stuck to wait for the mdlog to be - flushed for at most 5 seconds. - """ - - dir_path = "fsync_do_not_wait_mdlog_testdir" + def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False): self.mount_a.run_shell(["mkdir", dir_path]) + sync_dir_pyscript = dedent(""" + import os + + path = "{path}" + dfd = os.open(path, os.O_DIRECTORY) + os.fsync(dfd) + os.close(dfd) + """.format(path=dir_path)) + # run create/delete directories and test the sync time duration for i in range(300): for j in range(5): self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")]) start = time.time() - self.mount_a.run_shell(["sync"]) + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) duration = time.time() - start - log.info(f"mkdir i = {i}, duration = {duration}") + log.info(f"sync mkdir i = {i}, duration = {duration}") self.assertLess(duration, 4) for j in range(5): self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")]) start = time.time() - self.mount_a.run_shell(["sync"]) + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) duration = time.time() - start - log.info(f"rmdir i = {i}, duration = {duration}") + log.info(f"sync rmdir i = {i}, duration = {duration}") self.assertLess(duration, 4) self.mount_a.run_shell(["rm", "-rf", dir_path]) + def test_filesystem_sync_stuck_for_around_5s(self): + """ + To check whether the fsync will be stuck to wait for the mdlog to be + flushed for at most 5 seconds. + """ + + dir_path = "filesystem_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path) + + def test_file_sync_stuck_for_around_5s(self): + """ + To check whether the filesystem sync will be stuck to wait for the + mdlog to be flushed for at most 5 seconds. + """ + + dir_path = "file_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path, True) + class TestCacheDrop(CephFSTestCase): CLIENTS_REQUIRED = 1