From: Kotresh HR Date: Wed, 6 Jul 2022 11:59:39 +0000 (+0530) Subject: mgr/volumes: Allow forceful snapshot removal on osd full X-Git-Tag: v18.0.0~202^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0687f78650dd348619b06e20c299f82f2a0c1bf5;p=ceph-ci.git mgr/volumes: Allow forceful snapshot removal on osd full When the osd is full, if the snapshot has metadata set, it can't be removed as user metadata can't be removed when osd is full. This patch provides a way to remove the snapshot with 'force' option while keeping the corresponding metadata which gets removed on subvolume discover when it finds space. Fixes: https://tracker.ceph.com/issues/55976 Signed-off-by: Kotresh HR --- diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh index a891ab255ab..f6d0add9fda 100755 --- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -66,9 +66,11 @@ echo "After write" df $CEPH_MNT ceph osd df +# Snapshot removal with force option should succeed +ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force + #Cleanup from backend ignore_failure sudo rm -f /tmp/error_${PID}_file -ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0 ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 #Set the ratios back for other full tests to run diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index e2857cec3d1..0e2a64d48ea 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -806,7 +806,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): return pending_clones_info - def remove_snapshot(self, snapname): + def remove_snapshot(self, snapname, force=False): if self.has_pending_clones(snapname): raise VolumeException(-errno.EAGAIN, "snapshot '{0}' has pending clones".format(snapname)) snappath = self.snapshot_path(snapname) @@ -814,9 +814,16 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): self.metadata_mgr.remove_section(self.get_snap_section_name(snapname)) self.metadata_mgr.flush() except MetadataMgrException as me: - log.error(f"Failed to remove snapshot metadata on snap={snapname} subvol={self.subvol_name} " - f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}") - raise VolumeException(-errno.EAGAIN, f"failed to remove snapshot metadata on snap={snapname} reason={me.args[0]} {me.args[1]}") + if force: + log.info(f"Allowing snapshot removal on failure of it's metadata removal with force on " + f"snap={snapname} subvol={self.subvol_name} group={self.group_name} reason={me.args[1]}, " + f"errno:{-me.args[0]}, {os.strerror(-me.args[0])}") + pass + else: + log.error(f"Failed to remove snapshot metadata on snap={snapname} subvol={self.subvol_name} " + f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}") + raise VolumeException(-errno.EAGAIN, + f"failed to remove snapshot metadata on snap={snapname} reason={me.args[0]} {me.args[1]}") rmsnap(self.fs, snappath) def snapshot_info(self, snapname): diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 31d5f2443c5..03085d04971 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -385,8 +385,8 @@ class SubvolumeV2(SubvolumeV1): return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value} - def remove_snapshot(self, snapname): - super(SubvolumeV2, self).remove_snapshot(snapname) + def remove_snapshot(self, snapname, force=False): + super(SubvolumeV2, self).remove_snapshot(snapname, force) if self.purgeable: self.trash_base_dir() # tickle the volume purge job to purge this entry, using ESTALE diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py index cef35ad1546..cfc7480d6b7 100644 --- a/src/pybind/mgr/volumes/fs/volume.py +++ b/src/pybind/mgr/volumes/fs/volume.py @@ -519,7 +519,7 @@ class VolumeClient(CephfsClient["Module"]): with open_volume(self, volname) as fs_handle: with open_group(fs_handle, self.volspec, groupname) as group: with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_REMOVE) as subvolume: - subvolume.remove_snapshot(snapname) + subvolume.remove_snapshot(snapname, force) except VolumeException as ve: # ESTALE serves as an error to state that subvolume is currently stale due to internal removal and, # we should tickle the purge jobs to purge the same