From c89cd0e0fe5c4e159beed9b03a6c621f07143f24 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Sat, 6 Jan 2024 20:12:31 +0530 Subject: [PATCH] qa/cephfs: add tests for config option pause_purging Setting MGR config option mgr/volumes/pause_purging to true halts all ongoing purges and allows no new purging to begin until this option is changed to false. Add tests for this. Signed-off-by: Rishabh Dave (cherry picked from commit f38fcbc6109494e23e4948d794f59c927a9303ff) Conflicts: qa/tasks/cephfs/test_volumes.py - First conflict occurred due to missing import of safe_while which in Reef branch compared to main branch. Along with resolving this conflict this has been imported as it used by the tests. - Second conflict occured due to absence of some test methods right before where TestPausePurging was to be added. - Third conflict occured because entire contextutil was imported instead of just safe_while and only CommandFailedError was imported from teuthology.exceptions while this commit imports MaxWhileTries too. --- .../fs/volumes/tasks/volumes/test/async.yaml | 5 + qa/tasks/cephfs/test_volumes.py | 189 +++++++++++++++++- 2 files changed, 191 insertions(+), 3 deletions(-) create mode 100644 qa/suites/fs/volumes/tasks/volumes/test/async.yaml diff --git a/qa/suites/fs/volumes/tasks/volumes/test/async.yaml b/qa/suites/fs/volumes/tasks/volumes/test/async.yaml new file mode 100644 index 00000000000..f82e179bf14 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/async.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestPausePurging diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py index 7d334d933d4..6dbc9dbaebc 100644 --- a/qa/tasks/cephfs/test_volumes.py +++ b/qa/tasks/cephfs/test_volumes.py @@ -13,8 +13,8 @@ from io import StringIO from tasks.cephfs.cephfs_test_case import CephFSTestCase from tasks.cephfs.fuse_mount import FuseMount -from teuthology.exceptions import CommandFailedError -from teuthology import contextutil +from teuthology.contextutil import safe_while +from teuthology.exceptions import CommandFailedError, MaxWhileTries log = logging.getLogger(__name__) @@ -4257,6 +4257,189 @@ class TestSubvolumes(TestVolumesHelper): # verify trash dir is clean. self._wait_for_trash_empty() +class TestPausePurging(TestVolumesHelper): + ''' + Tests related to config "mgr/volumes/pause_purging". + ''' + + CONF_OPT = 'mgr/volumes/pause_purging' + + def tearDown(self): + # every test will change value of this config option as per its need. + # assure that this config option's default value is re-stored during + # tearDown() so that there's zero chance that it interferes with next + # test. + self.config_set('mgr', self.CONF_OPT, False) + + # ensure purge threads have no jobs left from previous test so that + # next test doesn't have to face unnecessary complications. + self._wait_for_trash_empty() + + super().tearDown() + + def _get_sv_path(self, v, sv): + sv_path = self.get_ceph_cmd_stdout(f'fs subvolume getpath {v} {sv}') + sv_path = sv_path.strip() + # delete slash at the beginning of path + sv_path = sv_path[1:] + + sv_path = os.path.join(self.mount_a.mountpoint, sv_path) + return sv_path + + def _assert_sv_is_absent_in_trash(self, sv, sv_path, sv_files): + uuid = self.mount_a.get_shell_stdout('sudo ls volumes/_deleting').\ + strip() + + trash_sv_path = sv_path.replace('_nogroup', f'_deleting/{uuid}') + trash_sv_path = trash_sv_path.replace(sv, '') + + try: + sv_files_new = self.mount_a.get_shell_stdout( + f'sudo ls {trash_sv_path}') + except CommandFailedError as cfe: + # in case dir for subvol including files in it are deleted + self.assertEqual(cfe.exitstatus, 2) + return + + # in case dir for subvol is undeleted yet (but files inside it are). + for filename in sv_files: + self.assertNotIn(filename, sv_files_new) + + def _assert_trashed_sv_is_unpurged(self, sv, sv_path, sv_files): + uuid = self.mount_a.get_shell_stdout('sudo ls volumes/_deleting').\ + strip() + + trash_sv_path = sv_path.replace('_nogroup', f'_deleting/{uuid}') + trash_sv_path = trash_sv_path.replace(sv, '') + sv_files_new = self.mount_a.get_shell_stdout(f'sudo ls {trash_sv_path}').\ + strip() + + for filename in sv_files: + self.assertIn(filename, sv_files_new) + + def test_when_paused_subvol_is_trashed_but_stays_unpurged(self): + ''' + Test that when MGR config option mgr/volumes/pause_purging is + set to true, running "ceph fs subvolume rm" will move the subvolume + to trash but not purge it, that is delete the subvolume from trash. + ''' + v = self.volname + sv = 'sv1' + + self.run_ceph_cmd(f'fs subvolume create {v} {sv} --mode=777') + + self.config_set('mgr', self.CONF_OPT, True) + sv_path = self._get_sv_path(v, sv) + self._do_subvolume_io(sv, number_of_files=10) + sv_files = self.mount_a.get_shell_stdout(f'sudo ls {sv_path}').\ + strip().split('\n') + + self.run_ceph_cmd(f'fs subvolume rm {v} {sv}') + # wait for a bit to ensure that trashed subvolume is not picked by + # purged threads eventually. + with safe_while(tries=1, sleep=7) as proceed: + try: + while proceed(): + self._assert_trashed_sv_is_unpurged(sv, sv_path, sv_files) + except MaxWhileTries: + pass + + def test_on_resuming_unpurged_subvol_is_purged(self): + ''' + Test that when MGR config option mgr/volumes/pause_purging is + is set to false, trashed but unpurged subvolume is purged (fully). + ''' + v = self.volname + sv = 'sv1' + + self.run_ceph_cmd(f'fs subvolume create {v} {sv} --mode=777') + self.config_set('mgr', self.CONF_OPT, True) + sv_path = self._get_sv_path(v, sv) + self._do_subvolume_io(sv, number_of_files=10) + sv_files = self.mount_a.get_shell_stdout(f'sudo ls {sv_path}').\ + strip().split('\n') + + self.run_ceph_cmd(f'fs subvolume rm {v} {sv}') + # wait for a bit to ensure that trashed subvolume is not picked by + # purged threads eventually. + with safe_while(tries=1, sleep=7) as proceed: + try: + while proceed(): + self._assert_trashed_sv_is_unpurged(sv, sv_path, sv_files) + except MaxWhileTries: + pass + + # XXX actual test here: test that unpurged subvol is purged + self.config_set('mgr', self.CONF_OPT, False) + self._wait_for_trash_empty() + + def _get_trashed_sv_path(self, sv, sv_path): + uuid = self.mount_a.get_shell_stdout('sudo ls volumes/_deleting').\ + strip() + + trashed_sv_path = sv_path.replace('_nogroup', f'_deleting/{uuid}') + trashed_sv_path = trashed_sv_path.replace(sv, '') + return trashed_sv_path + + def _get_num_of_files_in_trashed_sv(self, trashed_sv_path): + trashed_sv_files = self.mount_a.get_shell_stdout( + f'sudo ls {trashed_sv_path}').strip().split('\n') + return len(trashed_sv_files) + + def _assert_trashed_sv_has_num_of_files(self, trashed_sv_path, + num_of_files): + sv_files = self.mount_a.get_shell_stdout( + f'sudo ls {trashed_sv_path}').strip().split('\n') + + self.assertEqual(len(sv_files), num_of_files) + + def test_pausing_halts_ongoing_purge(self): + ''' + Test that when MGR config option mgr/volumes/pause_purging is + set to true, running "ceph fs subvolume rm" will move the subvolume + to trash but (asynchronous) purge of the subvolume won't happen till + this config option is not explicitly set to False. + ''' + v = self.volname + sv = 'sv1' + + self.run_ceph_cmd(f'fs subvolume create {v} {sv} --mode=777') + + sv_path = self._get_sv_path(v, sv) + # adding more files for this test since in once few runs it fails due + # to race condition + self._do_subvolume_io(sv, number_of_files=200) + + self.run_ceph_cmd(f'fs subvolume rm {v} {sv}') + # XXX actual test here: test that purging halts + self.config_set('mgr', self.CONF_OPT, True) + trashed_sv_path = self._get_trashed_sv_path(sv, sv_path) + NUM_OF_FILES = self._get_num_of_files_in_trashed_sv(trashed_sv_path) + time.sleep(2) + self._assert_trashed_sv_has_num_of_files(trashed_sv_path, + NUM_OF_FILES) + + def test_on_resuming_partly_purged_subvol_purges_fully(self): + ''' + Test that when MGR config option mgr/volumes/pause_purging is + changed to false, the async purging of a subvolume will resume and + also finish, causing trash to be empty. + ''' + v = self.volname + sv = 'sv1' + + self.run_ceph_cmd(f'fs subvolume create {v} {sv} --mode=777') + self._do_subvolume_io(sv, number_of_files=100) + + self.run_ceph_cmd(f'fs subvolume rm {v} {sv}') + self.config_set('mgr', self.CONF_OPT, True) + time.sleep(2) + + # XXX actual test here: test that purging is resumed and finished + self.config_set('mgr', self.CONF_OPT, False) + self._wait_for_trash_empty() + + class TestSubvolumeGroupSnapshots(TestVolumesHelper): """Tests for FS subvolume group snapshot operations.""" @unittest.skip("skipping subvolumegroup snapshot tests") @@ -8265,7 +8448,7 @@ class TestMisc(TestVolumesHelper): clone_path = f'./volumes/_nogroup/{clone}' self.mount_a.run_shell(['sudo', 'rm', '-rf', clone_path], omit_sudo=False) - with contextutil.safe_while(sleep=5, tries=6) as proceed: + with safe_while(sleep=5, tries=6) as proceed: while proceed(): try: result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) -- 2.39.5