From: Patrick Donnelly Date: Thu, 25 May 2023 00:29:43 +0000 (-0400) Subject: qa: wait for file to have correct size X-Git-Tag: v18.2.1~265^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=379654c4e4af64392bafa9e3651f6b2fa217fdb7;p=ceph-ci.git qa: wait for file to have correct size Otherwise suspending the netns of the other mount will prevent it from completing a flush on the file handle or even telling the MDS that the file size has changed! Fixes: https://tracker.ceph.com/issues/61409 Signed-off-by: Patrick Donnelly (cherry picked from commit 3486dd872ffd89042ca079d680841ce3ef6f4b6f) --- diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 23aa2f8378d..2a557c8bfd4 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -935,7 +935,7 @@ class CephFSMount(object): )) p.wait() - def open_background(self, basename="background_file", write=True): + def open_background(self, basename="background_file", write=True, content="content"): """ Open a file for writing, then block such that the client will hold a capability. @@ -952,12 +952,11 @@ class CephFSMount(object): import time with open("{path}", 'w') as f: - f.write('content') + f.write("{content}") f.flush() - f.write('content2') while True: time.sleep(1) - """).format(path=path) + """).format(path=path, content=content) else: pyscript = dedent(""" import time @@ -973,7 +972,10 @@ class CephFSMount(object): # This wait would not be sufficient if the file had already # existed, but it's simple and in practice users of open_background # are not using it on existing files. - self.wait_for_visible(basename) + if write: + self.wait_for_visible(basename, size=len(content)) + else: + self.wait_for_visible(basename) return rproc @@ -1011,19 +1013,27 @@ class CephFSMount(object): if nr_links == 2: return - def wait_for_visible(self, basename="background_file", timeout=30): + def wait_for_visible(self, basename="background_file", size=None, timeout=30): i = 0 + args = ['stat'] + if size is not None: + args += ['--printf=%s'] + args += [os.path.join(self.hostfs_mntpt, basename)] while i < timeout: - r = self.client_remote.run(args=[ - 'stat', os.path.join(self.hostfs_mntpt, basename) - ], check_status=False) - if r.exitstatus == 0: - log.debug("File {0} became visible from {1} after {2}s".format( - basename, self.client_id, i)) - return - else: - time.sleep(1) - i += 1 + p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False) + if p.exitstatus == 0: + if size is not None: + s = p.stdout.getvalue().strip() + if int(s) == size: + log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s") + return + else: + log.error(f"File {basename} became visible but with size {int(s)} not {size}") + else: + log.info(f"File {basename} became visible from {self.client_id} after {i}s") + return + time.sleep(1) + i += 1 raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( i, basename, self.client_id)) diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py index f8a50ad37a9..1bd6884a9dc 100644 --- a/qa/tasks/cephfs/test_client_recovery.py +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -7,7 +7,9 @@ import logging from textwrap import dedent import time import distutils.version as version +import random import re +import string import os from teuthology.orchestra import run @@ -217,8 +219,10 @@ class TestClientRecovery(CephFSTestCase): # Capability release from stale session # ===================================== if write: - cap_holder = self.mount_a.open_background() + content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)) + cap_holder = self.mount_a.open_background(content=content) else: + content = '' self.mount_a.run_shell(["touch", "background_file"]) self.mount_a.umount_wait() self.mount_a.mount_wait() @@ -229,7 +233,7 @@ class TestClientRecovery(CephFSTestCase): # Wait for the file to be visible from another client, indicating # that mount_a has completed its network ops - self.mount_b.wait_for_visible() + self.mount_b.wait_for_visible(size=len(content)) # Simulate client death self.mount_a.suspend_netns() @@ -260,11 +264,9 @@ class TestClientRecovery(CephFSTestCase): "Capability handover took {0}, expected approx {1}".format( cap_waited, session_timeout )) - - self.mount_a._kill_background(cap_holder) finally: - # teardown() doesn't quite handle this case cleanly, so help it out - self.mount_a.resume_netns() + self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable + self.mount_a._kill_background(cap_holder) def test_stale_read_caps(self): self._test_stale_caps(False) @@ -315,9 +317,9 @@ class TestClientRecovery(CephFSTestCase): cap_waited, session_timeout / 2.0 )) - self.mount_a._kill_background(cap_holder) finally: - self.mount_a.resume_netns() + self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable + self.mount_a._kill_background(cap_holder) def test_trim_caps(self): # Trim capability when reconnecting MDS @@ -383,7 +385,6 @@ class TestClientRecovery(CephFSTestCase): self.mount_b.check_filelock(do_flock=flockable) - # Tear down the background process self.mount_a._kill_background(lock_holder) def test_filelock_eviction(self): @@ -412,7 +413,6 @@ class TestClientRecovery(CephFSTestCase): # succeed self.wait_until_true(lambda: lock_taker.finished, timeout=10) finally: - # Tear down the background process self.mount_a._kill_background(lock_holder) # teardown() doesn't quite handle this case cleanly, so help it out