]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: wait for file to have correct size 52744/head
authorPatrick Donnelly <pdonnell@redhat.com>
Thu, 25 May 2023 00:29:43 +0000 (20:29 -0400)
committerPatrick Donnelly <pdonnell@redhat.com>
Wed, 2 Aug 2023 01:02:56 +0000 (21:02 -0400)
Otherwise suspending the netns of the other mount will prevent it from
completing a flush on the file handle or even telling the MDS that the
file size has changed!

Fixes: https://tracker.ceph.com/issues/61409
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit 3486dd872ffd89042ca079d680841ce3ef6f4b6f)

qa/tasks/cephfs/mount.py
qa/tasks/cephfs/test_client_recovery.py

index 9e733512418742c90bd98af2974ed2ce5c208b0e..72dc508b4052daa08899ad373771a23e578fe728 100644 (file)
@@ -811,7 +811,7 @@ class CephFSMount(object):
         ))
         p.wait()
 
-    def open_background(self, basename="background_file", write=True):
+    def open_background(self, basename="background_file", write=True, content="content"):
         """
         Open a file for writing, then block such that the client
         will hold a capability.
@@ -828,12 +828,11 @@ class CephFSMount(object):
                 import time
 
                 with open("{path}", 'w') as f:
-                    f.write('content')
+                    f.write("{content}")
                     f.flush()
-                    f.write('content2')
                     while True:
                         time.sleep(1)
-                """).format(path=path)
+                """).format(path=path, content=content)
         else:
             pyscript = dedent("""
                 import time
@@ -849,7 +848,10 @@ class CephFSMount(object):
         # This wait would not be sufficient if the file had already
         # existed, but it's simple and in practice users of open_background
         # are not using it on existing files.
-        self.wait_for_visible(basename)
+        if write:
+            self.wait_for_visible(basename, size=len(content))
+        else:
+            self.wait_for_visible(basename)
 
         return rproc
 
@@ -887,19 +889,27 @@ class CephFSMount(object):
                 if nr_links == 2:
                     return
 
-    def wait_for_visible(self, basename="background_file", timeout=30):
+    def wait_for_visible(self, basename="background_file", size=None, timeout=30):
         i = 0
+        args = ['stat']
+        if size is not None:
+            args += ['--printf=%s']
+        args += [os.path.join(self.hostfs_mntpt, basename)]
         while i < timeout:
-            r = self.client_remote.run(args=[
-                'stat', os.path.join(self.hostfs_mntpt, basename)
-            ], check_status=False)
-            if r.exitstatus == 0:
-                log.debug("File {0} became visible from {1} after {2}s".format(
-                    basename, self.client_id, i))
-                return
-            else:
-                time.sleep(1)
-                i += 1
+            p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False)
+            if p.exitstatus == 0:
+                if size is not None:
+                    s = p.stdout.getvalue().strip()
+                    if int(s) == size:
+                        log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s")
+                        return
+                    else:
+                        log.error(f"File {basename} became visible but with size {int(s)} not {size}")
+                else:
+                    log.info(f"File {basename} became visible from {self.client_id} after {i}s")
+                    return
+            time.sleep(1)
+            i += 1
 
         raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format(
             i, basename, self.client_id))
index 24726b369f9fd5ea4ccfcc26395d291a899feda3..db99cbb6ebbd47a153e1bae99b3a0ab945150056 100644 (file)
@@ -7,7 +7,9 @@ import logging
 from textwrap import dedent
 import time
 import distutils.version as version
+import random
 import re
+import string
 import os
 
 from teuthology.orchestra import run
@@ -219,8 +221,10 @@ class TestClientRecovery(CephFSTestCase):
         # Capability release from stale session
         # =====================================
         if write:
-            cap_holder = self.mount_a.open_background()
+            content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
+            cap_holder = self.mount_a.open_background(content=content)
         else:
+            content = ''
             self.mount_a.run_shell(["touch", "background_file"])
             self.mount_a.umount_wait()
             self.mount_a.mount_wait()
@@ -231,7 +235,7 @@ class TestClientRecovery(CephFSTestCase):
 
         # Wait for the file to be visible from another client, indicating
         # that mount_a has completed its network ops
-        self.mount_b.wait_for_visible()
+        self.mount_b.wait_for_visible(size=len(content))
 
         # Simulate client death
         self.mount_a.suspend_netns()
@@ -262,11 +266,9 @@ class TestClientRecovery(CephFSTestCase):
                             "Capability handover took {0}, expected approx {1}".format(
                                 cap_waited, session_timeout
                             ))
-
-            self.mount_a._kill_background(cap_holder)
         finally:
-            # teardown() doesn't quite handle this case cleanly, so help it out
-            self.mount_a.resume_netns()
+            self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+        self.mount_a._kill_background(cap_holder)
 
     def test_stale_read_caps(self):
         self._test_stale_caps(False)
@@ -317,9 +319,9 @@ class TestClientRecovery(CephFSTestCase):
                                 cap_waited, session_timeout / 2.0
                             ))
 
-            self.mount_a._kill_background(cap_holder)
         finally:
-            self.mount_a.resume_netns()
+            self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+        self.mount_a._kill_background(cap_holder)
 
     def test_trim_caps(self):
         # Trim capability when reconnecting MDS
@@ -385,7 +387,6 @@ class TestClientRecovery(CephFSTestCase):
 
         self.mount_b.check_filelock(do_flock=flockable)
 
-        # Tear down the background process
         self.mount_a._kill_background(lock_holder)
 
     def test_filelock_eviction(self):
@@ -414,7 +415,6 @@ class TestClientRecovery(CephFSTestCase):
             # succeed
             self.wait_until_true(lambda: lock_taker.finished, timeout=10)
         finally:
-            # Tear down the background process
             self.mount_a._kill_background(lock_holder)
 
             # teardown() doesn't quite handle this case cleanly, so help it out