From: John Spray Date: Mon, 13 Feb 2017 17:17:29 +0000 (+0000) Subject: qa: add TestStrays.test_purge_queue_op_rate X-Git-Tag: v12.0.1~140^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6cf9c2956cb25eeb3c9e5c3fd59650f626314b41;p=ceph-ci.git qa: add TestStrays.test_purge_queue_op_rate For ensuring that the PurgeQueue code is not generating too many extra IOs. Signed-off-by: John Spray --- diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py index 4484f8d8cee..500a0aa84db 100644 --- a/qa/tasks/cephfs/test_strays.py +++ b/qa/tasks/cephfs/test_strays.py @@ -893,4 +893,73 @@ class TestStrays(CephFSTestCase): self.mds_cluster.mds_fail() self.fs.rados(["rm", "500.00000000"]) self.mds_cluster.mds_restart() - self.fs.wait_for_daemons() \ No newline at end of file + self.fs.wait_for_daemons() + + def test_purge_queue_op_rate(self): + """ + A busy purge queue is meant to aggregate operations sufficiently + that our RADOS ops to the metadata pool are not O(files). Check + that that is so. + :return: + """ + + # For low rates of deletion, the rate of metadata ops actually + # will be o(files), so to see the desired behaviour we have to give + # the system a significant quantity, i.e. an order of magnitude + # more than the number of files it will purge at one time. + + max_purge_files = 2 + + self.set_conf('mds', 'mds_max_purge_files', "%d" % max_purge_files) + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + phase_1_files = 256 + phase_2_files = 512 + + self.mount_a.run_shell(["mkdir", "phase1"]) + self.mount_a.create_n_files("phase1/file", phase_1_files) + + self.mount_a.run_shell(["mkdir", "phase2"]) + self.mount_a.create_n_files("phase2/file", phase_2_files) + + def unlink_and_count_ops(path, expected_deletions): + initial_ops = self.get_stat("objecter", "op") + initial_pq_executed = self.get_stat("purge_queue", "pq_executed") + + self.mount_a.run_shell(["rm", "-rf", path]) + + self._wait_for_counter( + "purge_queue", "pq_executed", initial_pq_executed + expected_deletions + ) + + final_ops = self.get_stat("objecter", "op") + + # Calculation of the *overhead* operations, i.e. do not include + # the operations where we actually delete files. + return final_ops - initial_ops - expected_deletions + + self.fs.mds_asok(['flush', 'journal']) + phase1_ops = unlink_and_count_ops("phase1/", phase_1_files + 1) + + self.fs.mds_asok(['flush', 'journal']) + phase2_ops = unlink_and_count_ops("phase2/", phase_2_files + 1) + + log.info("Phase 1: {0}".format(phase1_ops)) + log.info("Phase 2: {0}".format(phase2_ops)) + + # The success criterion is that deleting double the number + # of files doesn't generate double the number of overhead ops + # -- this comparison is a rough approximation of that rule. + self.assertTrue(phase2_ops < phase1_ops * 1.25) + + # Finally, check that our activity did include properly quiescing + # the queue (i.e. call to Journaler::write_head in the right place), + # by restarting the MDS and checking that it doesn't try re-executing + # any of the work we did. + self.fs.mds_asok(['flush', 'journal']) # flush to ensure no strays + # hanging around + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + time.sleep(10) + self.assertEqual(self.get_stat("purge_queue", "pq_executed"), 0)