From 7544a95646bb0bb6c0298abd0c9716403553949c Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Wed, 10 Sep 2025 13:29:00 +0530 Subject: [PATCH] mgr/volumes: make clone state transition to fail when UUID dir is missing... for the source subvolume. Also add tests for the same. Fixes: https://tracker.ceph.com/issues/72957 Signed-off-by: Rishabh Dave --- qa/tasks/cephfs/test_volumes.py | 23 +++++++++++++++++++ src/pybind/mgr/volumes/fs/async_cloner.py | 10 ++++---- .../mgr/volumes/fs/operations/subvolume.py | 10 ++++++-- .../mgr/volumes/fs/operations/template.py | 1 + .../fs/operations/versions/subvolume_v2.py | 11 +++++++-- 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py index ad7636af75e..3bbd58ec99f 100644 --- a/qa/tasks/cephfs/test_volumes.py +++ b/qa/tasks/cephfs/test_volumes.py @@ -10839,3 +10839,26 @@ class TestCorruptedSubvolumes(TestVolumesHelper): # cleanup self.run_ceph_cmd(f'fs subvolume rm {self.volname} {sv1} --force') + + def test_clone_when_src_subvol_has_missing_UUID_dir(self): + sv1 = 'sv1' + ss1 = 'ss1' + c1 = 'c1' + + self.run_ceph_cmd(f'fs subvolume create {self.volname} {sv1}') + sv_path = self.get_ceph_cmd_stdout('fs subvolume getpath ' + f'{self.volname} {sv1}').strip() + sv_path = sv_path[1:] + self.run_ceph_cmd(f'fs subvolume snapshot create {self.volname} {sv1} {ss1}') + self.run_ceph_cmd('config set mgr mgr/volumes/snapshot_clone_delay 2') + self.run_ceph_cmd(f'fs subvolume snapshot clone {self.volname} {sv1} {ss1} {c1}') + + self.mount_a.run_shell(f'sudo rmdir {sv_path}', omit_sudo=False) + + time.sleep(2) + self._wait_for_clone_to_fail(c1, timo=20) + + # cleanup + self.run_ceph_cmd(f'fs subvolume snapshot rm {self.volname} {sv1} {ss1} ' + '--force') + self.run_ceph_cmd(f'fs subvolume rm {self.volname} {sv1} --force') diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index c22c6ee4816..3522c67d443 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -228,12 +228,12 @@ def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): set_quota_on_clone(fs_handle, (subvol0, subvol1, subvol2)) def update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, ve): - with open_clone_subvol_pair_in_vol(fs_client, volspec, volname, groupname, - subvolname, lockless=False) as (subvol0, subvol1, subvol2): + with open_at_volume(fs_client, volspec, volname, groupname, subvolname, + SubvolumeOpType.CLONE_INTERNAL) as clone: if ve.errno == -errno.EINTR: - subvol0.add_clone_failure(-ve.errno, "user interrupted clone operation") + clone.add_clone_failure(-ve.errno, "user interrupted clone operation") else: - subvol0.add_clone_failure(-ve.errno, ve.error_str) + clone.add_clone_failure(-ve.errno, ve.error_str) def log_clone_failure(volname, groupname, subvolname, ve): if ve.errno == -errno.EINTR: @@ -261,7 +261,7 @@ def handle_clone_failed(fs_client, volspec, volname, index, groupname, subvolnam try: # detach source but leave the clone section intact for later inspection with open_clone_subvol_pair_in_vol(fs_client, volspec, volname, groupname, - subvolname) as (subvol0, subvol1, subvol2): + subvolname, failed=True) as (subvol0, subvol1, subvol2): subvol1.detach_snapshot(subvol2, index) except (MetadataMgrException, VolumeException) as e: log.error("failed to detach clone from snapshot: {0}".format(e)) diff --git a/src/pybind/mgr/volumes/fs/operations/subvolume.py b/src/pybind/mgr/volumes/fs/operations/subvolume.py index 4182e4f7571..885eec16e3f 100644 --- a/src/pybind/mgr/volumes/fs/operations/subvolume.py +++ b/src/pybind/mgr/volumes/fs/operations/subvolume.py @@ -102,7 +102,8 @@ def open_subvol_in_group(mgr, vol_handle, vol_spec, group_name, subvol_name, @contextmanager def open_clone_subvol_pair_in_vol(vc, vol_spec, vol_name, group_name, - subvol_name, lockless=False): + subvol_name, lockless=False, failed=False): + with open_subvol_in_vol(vc, vol_spec, vol_name, group_name, subvol_name, SubvolumeOpType.CLONE_INTERNAL, lockless) \ as (vol_handle, _, dst_subvol): @@ -113,9 +114,14 @@ def open_clone_subvol_pair_in_vol(vc, vol_spec, vol_name, group_name, # use the same subvolume to avoid metadata overwrites yield (dst_subvol, dst_subvol, src_snap_name) else: + if failed: + op_type = SubvolumeOpType.CLONE_FAILED + else: + op_type = SubvolumeOpType.CLONE_SOURCE + with open_subvol_in_group(vc.mgr, vol_handle, vol_spec, src_group_name, src_subvol_name, - SubvolumeOpType.CLONE_SOURCE) \ + op_type) \ as src_subvol: yield (dst_subvol, src_subvol, src_snap_name) diff --git a/src/pybind/mgr/volumes/fs/operations/template.py b/src/pybind/mgr/volumes/fs/operations/template.py index 2189e20ce13..7b1cef1907c 100644 --- a/src/pybind/mgr/volumes/fs/operations/template.py +++ b/src/pybind/mgr/volumes/fs/operations/template.py @@ -58,6 +58,7 @@ class SubvolumeOpType(Enum): CLONE_CREATE = 'clone-create' CLONE_STATUS = 'clone-status' CLONE_CANCEL = 'clone-cancel' + CLONE_FAILED = 'clone-failed' CLONE_INTERNAL = 'clone_internal' ALLOW_ACCESS = 'allow-access' DENY_ACCESS = 'deny-access' diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 164448133e0..e4aa2591bf9 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -288,7 +288,8 @@ class SubvolumeV2(SubvolumeV1): SubvolumeOpType.SNAP_INFO, SubvolumeOpType.SNAP_PROTECT, SubvolumeOpType.SNAP_UNPROTECT, - SubvolumeOpType.CLONE_SOURCE + SubvolumeOpType.CLONE_SOURCE, + SubvolumeOpType.CLONE_FAILED } return {SubvolumeOpType.REMOVE_FORCE, @@ -296,7 +297,8 @@ class SubvolumeV2(SubvolumeV1): SubvolumeOpType.CLONE_STATUS, SubvolumeOpType.CLONE_CANCEL, SubvolumeOpType.CLONE_INTERNAL, - SubvolumeOpType.CLONE_SOURCE} + SubvolumeOpType.CLONE_SOURCE, + SubvolumeOpType.CLONE_FAILED} def open(self, op_type): if not isinstance(op_type, SubvolumeOpType): @@ -343,6 +345,11 @@ class SubvolumeV2(SubvolumeV1): f"path '{subvol_path}' for subvolume " f"{self.subvolname}'") return + elif op_type == SubvolumeOpType.CLONE_FAILED: + log.debug('since clone failed, letting that register in .meta ' + 'file and ignoring missing subvolume path ' + f'{subvol_path} for subvolume {self.subvolname}') + return log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname)) raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname)) except cephfs.Error as e: -- 2.47.3