From: Kotresh HR Date: Wed, 6 Jul 2022 11:59:39 +0000 (+0530) Subject: mgr/volumes: Allow forceful snapshot removal on osd full X-Git-Tag: v16.2.11~278^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0a748bb10535a2858b97d6253a6277f38be7daa3;p=ceph.git mgr/volumes: Allow forceful snapshot removal on osd full When the osd is full, if the snapshot has metadata set, it can't be removed as user metadata can't be removed when osd is full. This patch provides a way to remove the snapshot with 'force' option while keeping the corresponding metadata which gets removed on subvolume discover when it finds space. Fixes: https://tracker.ceph.com/issues/55976 Signed-off-by: Kotresh HR (cherry picked from commit 0687f78650dd348619b06e20c299f82f2a0c1bf5) --- diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh index a891ab255ab2..f6d0add9fda4 100755 --- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -66,9 +66,11 @@ echo "After write" df $CEPH_MNT ceph osd df +# Snapshot removal with force option should succeed +ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force + #Cleanup from backend ignore_failure sudo rm -f /tmp/error_${PID}_file -ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0 ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 #Set the ratios back for other full tests to run diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index e2857cec3d18..0e2a64d48eac 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -806,7 +806,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): return pending_clones_info - def remove_snapshot(self, snapname): + def remove_snapshot(self, snapname, force=False): if self.has_pending_clones(snapname): raise VolumeException(-errno.EAGAIN, "snapshot '{0}' has pending clones".format(snapname)) snappath = self.snapshot_path(snapname) @@ -814,9 +814,16 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): self.metadata_mgr.remove_section(self.get_snap_section_name(snapname)) self.metadata_mgr.flush() except MetadataMgrException as me: - log.error(f"Failed to remove snapshot metadata on snap={snapname} subvol={self.subvol_name} " - f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}") - raise VolumeException(-errno.EAGAIN, f"failed to remove snapshot metadata on snap={snapname} reason={me.args[0]} {me.args[1]}") + if force: + log.info(f"Allowing snapshot removal on failure of it's metadata removal with force on " + f"snap={snapname} subvol={self.subvol_name} group={self.group_name} reason={me.args[1]}, " + f"errno:{-me.args[0]}, {os.strerror(-me.args[0])}") + pass + else: + log.error(f"Failed to remove snapshot metadata on snap={snapname} subvol={self.subvol_name} " + f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}") + raise VolumeException(-errno.EAGAIN, + f"failed to remove snapshot metadata on snap={snapname} reason={me.args[0]} {me.args[1]}") rmsnap(self.fs, snappath) def snapshot_info(self, snapname): diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 31d5f2443c50..03085d049713 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -385,8 +385,8 @@ class SubvolumeV2(SubvolumeV1): return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value} - def remove_snapshot(self, snapname): - super(SubvolumeV2, self).remove_snapshot(snapname) + def remove_snapshot(self, snapname, force=False): + super(SubvolumeV2, self).remove_snapshot(snapname, force) if self.purgeable: self.trash_base_dir() # tickle the volume purge job to purge this entry, using ESTALE diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py index 2e83712453bb..a67a304b6b07 100644 --- a/src/pybind/mgr/volumes/fs/volume.py +++ b/src/pybind/mgr/volumes/fs/volume.py @@ -504,7 +504,7 @@ class VolumeClient(CephfsClient["Module"]): with open_volume(self, volname) as fs_handle: with open_group(fs_handle, self.volspec, groupname) as group: with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_REMOVE) as subvolume: - subvolume.remove_snapshot(snapname) + subvolume.remove_snapshot(snapname, force) except VolumeException as ve: # ESTALE serves as an error to state that subvolume is currently stale due to internal removal and, # we should tickle the purge jobs to purge the same