From: Kotresh HR Date: Tue, 31 Mar 2026 07:59:20 +0000 (+0530) Subject: qa: Add mirror test for snapshot with only dir X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b6713920330e474346e7dcd0b9bbbd3e1a02a905;p=ceph.git qa: Add mirror test for snapshot with only dir Fixes: https://tracker.ceph.com/issues/75804 Signed-off-by: Kotresh HR --- diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index 04fa198df475..dbe6dfb1fa7d 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -1777,3 +1777,68 @@ class TestMirroring(CephFSTestCase): self.assertLess(d2_sync_time_stamp, d0_sync_time_stamp) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_multithread_snapshot_only_dirs_sync(self): + """ + Test that snapshot containing only directories sync and doesn't hang. + + ... When snapshot containing only directories and no files, are queued for syncing, the sync hangs in following scenario. + 1. Configure say /d0 and /d1 for mirroring. + 2. Create around 10k files in /d0 + 3. Create a single dir say /d1/dir0 or nothing + 4. snapshot /d0 and wait for status to change to 'syncing' + 5. Now, snapshot /d1. + + The /d1 snapshot will be stuck in syncing for ever. + See tracker https://tracker.ceph.com/issues/75804 for more details. + """ + self.setup_mount_b(mds_perm='rw') + peer_spec = "client.mirror_remote@ceph" + + # create 2 directories to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.run_shell(["mkdir", "d1"]) + + self.mount_a.create_n_files("d0/myfile", 10000) + self.mount_a.run_shell(["mkdir", "d1/dir0"]) + + log.debug('enabling mirroring') + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + # The issue happens when the snapshot's syncm object is waiting for datasync threads + # at syncm_q and doesn't hit if syncm object is waiting at syncm's data queue for files. + # This can be achieved with the following. + # 1. Create more files (10k) for /d0 - + # This provides enough time for /d1's crawl to finish before datasync threads picks + # up /d1 for syncing. If the crawl is not finished when data sync threads are available, + # it will go and wait for files at syncm's data queue. + # 2. Disable distribute_datasync_threads options. This guarantees that the /d1 + # is not picked up for syncing before it's crawling is finished and provides enough + # window for the /d1's crawling to finish as all threads are busy syncing /d0's + # 10k files + log.debug('disabling cephfs_mirror_distribute_datasync_threads config') + self.config_set('client.mirror', 'cephfs_mirror_distribute_datasync_threads', 'false') + + log.debug('adding directory paths') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.peer_add(self.primary_fs_name, self.primary_fs_id, peer_spec, self.secondary_fs_name) + + # take /d0 snapshot first, so that it starts syncing + log.debug('take /d0 snapshot first') + snap_name = "snap0" + self.mount_a.run_shell(["mkdir", f"d0/.snap/{snap_name}"]) + log.debug('checking /d0/.snap/snap0 in progress') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + peer_spec, '/d0', 'snap0') + + # now that /d0 is in progress, take snaps of /d1 + self.mount_a.run_shell(["mkdir", f"d1/.snap/{snap_name}"]) + + # Wait for d0, d1 + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, peer_spec, '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, peer_spec, '/d1', 'snap0', 1) + self.verify_snapshot('d1', 'snap0') + + self.config_set('client.mirror', 'cephfs_mirror_distribute_datasync_threads', 'true') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)