From: Yan, Zheng Date: Tue, 24 Mar 2020 13:38:39 +0000 (+0800) Subject: qa/cephfs: update TestClusterFull:test_barrier X-Git-Tag: v16.1.0~2550^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1e15f02e8fbbee41ca19f175a547427534020d15;p=ceph.git qa/cephfs: update TestClusterFull:test_barrier The test case assumes that client does not send cap messages if there is no operation. Recent change that timeout idle open files break that assumpation. Fixes: https://tracker.ceph.com/issues/44382 Signed-off-by: "Yan, Zheng" --- diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py index 112407de18c3..3ba05af1db7c 100644 --- a/qa/tasks/cephfs/test_full.py +++ b/qa/tasks/cephfs/test_full.py @@ -45,18 +45,26 @@ class FullnessTestCase(CephFSTestCase): epoch. """ - # Sync up clients with initial MDS OSD map barrier - self.mount_a.open_no_data("foo") - self.mount_b.open_no_data("bar") + # script that sync up client with MDS OSD map barrier. The barrier should + # be updated by cap flush ack message. + pyscript = dedent(""" + import os + fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600) + os.fchmod(fd, 0O666) + os.fsync(fd) + os.close(fd) + """) + + # Sync up client with initial MDS OSD map barrier. + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) # Grab mounts' initial OSD epochs: later we will check that # it hasn't advanced beyond this point. - mount_a_initial_epoch = self.mount_a.get_osd_epoch()[0] - mount_b_initial_epoch = self.mount_b.get_osd_epoch()[0] + mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch() # Freshly mounted at start of test, should be up to date with OSD map self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch) - self.assertGreaterEqual(mount_b_initial_epoch, self.initial_osd_epoch) # Set and unset a flag to cause OSD epoch to increment self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") @@ -69,43 +77,28 @@ class FullnessTestCase(CephFSTestCase): # Do a metadata operation on clients, witness that they end up with # the old OSD map from startup time (nothing has prompted client # to update its map) - self.mount_a.open_no_data("alpha") - self.mount_b.open_no_data("bravo1") - - # Sleep long enough that if the OSD map was propagating it would - # have done so (this is arbitrary because we are 'waiting' for something - # to *not* happen). - time.sleep(30) - + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() self.assertEqual(mount_a_epoch, mount_a_initial_epoch) - mount_b_epoch, mount_b_barrier = self.mount_b.get_osd_epoch() - self.assertEqual(mount_b_epoch, mount_b_initial_epoch) + self.assertEqual(mount_a_barrier, mount_a_initial_barrier) # Set a barrier on the MDS self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()]) - # Do an operation on client B, witness that it ends up with - # the latest OSD map from the barrier. This shouldn't generate any - # cap revokes to A because B was already the last one to touch - # a file in root. - self.mount_b.run_shell(["touch", "bravo2"]) - self.mount_b.open_no_data("bravo2") + # Sync up client with new MDS OSD map barrier + path = os.path.join(self.mount_a.mountpoint, "baz") + self.mount_a.run_python(pyscript.format(path=path)) + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_barrier, new_epoch) # Some time passes here because the metadata part of the operation # completes immediately, while the resulting OSD map update happens # asynchronously (it's an Objecter::_maybe_request_map) as a result # of seeing the new epoch barrier. - self.wait_until_equal( - lambda: self.mount_b.get_osd_epoch(), - (new_epoch, new_epoch), - 30, - lambda x: x[0] > new_epoch or x[1] > new_epoch) - - # ...and none of this should have affected the oblivious mount a, - # because it wasn't doing any data or metadata IO - mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() - self.assertEqual(mount_a_epoch, mount_a_initial_epoch) + self.wait_until_true( + lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch, + timeout=30) def _data_pool_name(self): data_pool_names = self.fs.get_data_pool_names()