From: Xiubo Li Date: Tue, 12 Apr 2022 04:37:13 +0000 (+0800) Subject: qa: add filesystem sync stuck test support X-Git-Tag: v17.2.1~10^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4c157991a8a4d4b341d72bcd56254eff4f491142;p=ceph.git qa: add filesystem sync stuck test support This will test the sync of the filesystem, which maybe stuck for at most 5 seconds. This was because the related code will wait for all the unsafe requests to get safe reply from MDSes, but the MDSes just think that it's unnecessary to flush the mdlog immediately after early reply, and the mdlog will be flushed every 5 seconds in the tick thread. This should have been fixed in kclient and libcephfs by triggering mdlog flush before waiting requests' safe reply. Fixes: https://tracker.ceph.com/issues/55283 Signed-off-by: Xiubo Li (cherry picked from commit b6fc5480f6ba6352fa72062e1376d0dd6b9074cd) --- diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 2e5387aecf7..921ae8166bf 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -8,6 +8,7 @@ import platform import time import json import logging +import os log = logging.getLogger(__name__) @@ -235,6 +236,36 @@ class TestMisc(CephFSTestCase): self.assertEqual(lsflags["allow_multimds_snaps"], True) self.assertEqual(lsflags["allow_standby_replay"], True) + def test_filesystem_sync_stuck_for_around_5s(self): + """ + To check whether the fsync will be stuck to wait for the mdlog to be + flushed for at most 5 seconds. + """ + + dir_path = "fsync_do_not_wait_mdlog_testdir" + self.mount_a.run_shell(["mkdir", dir_path]) + + # run create/delete directories and test the sync time duration + for i in range(300): + for j in range(5): + self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"mkdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + for j in range(5): + self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"rmdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + self.mount_a.run_shell(["rm", "-rf", dir_path]) + + class TestCacheDrop(CephFSTestCase): CLIENTS_REQUIRED = 1