]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Merge pull request #59071 from joscollin/wip-67386-reef
authorVenky Shankar <vshankar@redhat.com>
Tue, 26 Nov 2024 16:48:26 +0000 (22:18 +0530)
committerGitHub <noreply@github.com>
Tue, 26 Nov 2024 16:48:26 +0000 (22:18 +0530)
reef: cephfs_mirror: provide metrics for last successful snapshot sync

Reviewed-by: Venky Shankar <vshankar@redhat.com>
1  2 
doc/cephfs/cephfs-mirroring.rst
qa/tasks/cephfs/test_mirroring.py

Simple merge
index 1b49805a1c4c37d72790b9436ff6cf96798b1e41,181228f4e1bdc7e7cff1af6ad23e5884c4cc6c54..933ace97fa81b3d5649debc6eedf3ac3cdd3bc05
@@@ -1529,6 -1508,70 +1548,71 @@@ class TestMirroring(CephFSTestCase)
          self.mount_b.run_shell(["mkdir", "-p", "d1/d2/d3"])
          attr = str(random.randint(1, 10))
          self.mount_b.setfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id", attr)
 +        log.debug('getting ceph.mirror.dirty_snap_id attribute')
          val = self.mount_b.getfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id")
          self.assertEqual(attr, val, f"Mismatch for ceph.mirror.dirty_snap_id value: {attr} vs {val}")
+     def test_cephfs_mirror_remote_snap_corrupt_fails_synced_snapshot(self):
+         """
+         That making manual changes to the remote .snap directory shows 'peer status' state: "failed"
+         for a synced snapshot and then restores to "idle" when those changes are reverted.
+         """
+         log.debug('reconfigure client auth caps')
+         self.get_ceph_cmd_result(
+             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+             'mds', 'allow rwps',
+             'mon', 'allow r',
+             'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                 self.backup_fs.get_data_pool_name(),
+                 self.backup_fs.get_data_pool_name()))
+         log.debug(f'mounting filesystem {self.secondary_fs_name}')
+         self.mount_b.umount_wait()
+         self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+         self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+         peer_spec = "client.mirror_remote@ceph"
+         self.peer_add(self.primary_fs_name, self.primary_fs_id, peer_spec, self.secondary_fs_name)
+         dir_name = 'd0'
+         self.mount_a.run_shell(['mkdir', dir_name])
+         self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{dir_name}')
+         # take a snapshot
+         snap_name = "snap_a"
+         expected_snap_count = 1
+         self.mount_a.run_shell(['mkdir', f'{dir_name}/.snap/{snap_name}'])
+         time.sleep(30)
+         # confirm snapshot synced and status 'idle'
+         self.check_peer_status_idle(self.primary_fs_name, self.primary_fs_id,
+                                     peer_spec, f'/{dir_name}', snap_name, expected_snap_count)
+         remote_snap_name = 'snap_b'
+         remote_snap_path = f'{dir_name}/.snap/{remote_snap_name}'
+         failure_reason = f"snapshot '{remote_snap_name}' has invalid metadata"
+         dir_name = f'/{dir_name}'
+         # create a directory in the remote fs and check status 'failed'
+         self.mount_b.run_shell(['sudo', 'mkdir', remote_snap_path], omit_sudo=False)
+         peer_uuid = self.get_peer_uuid(peer_spec)
+         with safe_while(sleep=1, tries=60, action=f'wait for failed status: {peer_spec}') as proceed:
+             while proceed():
+                 res = self.mirror_daemon_command(f'peer status for fs: {self.primary_fs_name}',
+                                                  'fs', 'mirror', 'peer', 'status',
+                                                  f'{self.primary_fs_name}@{self.primary_fs_id}', peer_uuid)
+                 if('failed' == res[dir_name]['state'] and \
+                    failure_reason == res.get(dir_name, {}).get('failure_reason', {}) and \
+                    snap_name == res[dir_name]['last_synced_snap']['name'] and \
+                    expected_snap_count == res[dir_name]['snaps_synced']):
+                     break
+         # remove the directory in the remote fs and check status restores to 'idle'
+         self.mount_b.run_shell(['sudo', 'rmdir', remote_snap_path], omit_sudo=False)
+         with safe_while(sleep=1, tries=60, action=f'wait for idle status: {peer_spec}') as proceed:
+             while proceed():
+                 res = self.mirror_daemon_command(f'peer status for fs: {self.primary_fs_name}',
+                                                  'fs', 'mirror', 'peer', 'status',
+                                                  f'{self.primary_fs_name}@{self.primary_fs_id}', peer_uuid)
+                 if('idle' == res[dir_name]['state'] and 'failure_reason' not in res and \
+                    snap_name == res[dir_name]['last_synced_snap']['name'] and \
+                    expected_snap_count == res[dir_name]['snaps_synced']):
+                     break
+         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)