From: Jos Collin Date: Fri, 29 Mar 2024 10:20:02 +0000 (+0530) Subject: qa: add test_cephfs_mirror_remote_snap_corrupt_fails_synced_snapshot X-Git-Tag: v18.2.5~254^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1b3562be5ef45e35992d561d39a88893591a4bb3;p=ceph.git qa: add test_cephfs_mirror_remote_snap_corrupt_fails_synced_snapshot Fixes: https://tracker.ceph.com/issues/65226 Signed-off-by: Jos Collin (cherry picked from commit db65867f77652b9c7f5df44d9bff73e7a45b32a1) --- diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index 689fed96226..1430b2cac20 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -204,6 +204,17 @@ class TestMirroring(CephFSTestCase): self.assertTrue(res[dir_name]['last_synced_snap']['name'] == expected_snap_name) self.assertTrue(res[dir_name]['snaps_synced'] == expected_snap_count) + def check_peer_status_idle(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name, + expected_snap_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue('idle' == res[dir_name]['state']) + self.assertTrue(expected_snap_name == res[dir_name]['last_synced_snap']['name']) + self.assertTrue(expected_snap_count == res[dir_name]['snaps_synced']) + def check_peer_status_deleted_snap(self, fs_name, fs_id, peer_spec, dir_name, expected_delete_count): peer_uuid = self.get_peer_uuid(peer_spec) @@ -1499,3 +1510,68 @@ class TestMirroring(CephFSTestCase): self.mount_b.setfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id", attr) val = self.mount_b.getfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id") self.assertEqual(attr, val, f"Mismatch for ceph.mirror.dirty_snap_id value: {attr} vs {val}") + + def test_cephfs_mirror_remote_snap_corrupt_fails_synced_snapshot(self): + """ + That making manual changes to the remote .snap directory shows 'peer status' state: "failed" + for a synced snapshot and then restores to "idle" when those changes are reverted. + """ + log.debug('reconfigure client auth caps') + self.get_ceph_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rwps', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + peer_spec = "client.mirror_remote@ceph" + self.peer_add(self.primary_fs_name, self.primary_fs_id, peer_spec, self.secondary_fs_name) + dir_name = 'd0' + self.mount_a.run_shell(['mkdir', dir_name]) + self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{dir_name}') + + # take a snapshot + snap_name = "snap_a" + expected_snap_count = 1 + self.mount_a.run_shell(['mkdir', f'{dir_name}/.snap/{snap_name}']) + + time.sleep(30) + # confirm snapshot synced and status 'idle' + self.check_peer_status_idle(self.primary_fs_name, self.primary_fs_id, + peer_spec, f'/{dir_name}', snap_name, expected_snap_count) + + remote_snap_name = 'snap_b' + remote_snap_path = f'{dir_name}/.snap/{remote_snap_name}' + failure_reason = f"snapshot '{remote_snap_name}' has invalid metadata" + dir_name = f'/{dir_name}' + + # create a directory in the remote fs and check status 'failed' + self.mount_b.run_shell(['sudo', 'mkdir', remote_snap_path], omit_sudo=False) + peer_uuid = self.get_peer_uuid(peer_spec) + with safe_while(sleep=1, tries=60, action=f'wait for failed status: {peer_spec}') as proceed: + while proceed(): + res = self.mirror_daemon_command(f'peer status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{self.primary_fs_name}@{self.primary_fs_id}', peer_uuid) + if('failed' == res[dir_name]['state'] and \ + failure_reason == res.get(dir_name, {}).get('failure_reason', {}) and \ + snap_name == res[dir_name]['last_synced_snap']['name'] and \ + expected_snap_count == res[dir_name]['snaps_synced']): + break + # remove the directory in the remote fs and check status restores to 'idle' + self.mount_b.run_shell(['sudo', 'rmdir', remote_snap_path], omit_sudo=False) + with safe_while(sleep=1, tries=60, action=f'wait for idle status: {peer_spec}') as proceed: + while proceed(): + res = self.mirror_daemon_command(f'peer status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{self.primary_fs_name}@{self.primary_fs_id}', peer_uuid) + if('idle' == res[dir_name]['state'] and 'failure_reason' not in res and \ + snap_name == res[dir_name]['last_synced_snap']['name'] and \ + expected_snap_count == res[dir_name]['snaps_synced']): + break + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)