From 8c8cec08db382755045b10f24ab1b72bc8f3da72 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Wed, 12 Jan 2022 15:01:53 +0530 Subject: [PATCH] mgr/volumes: Fix subvoume snapshot clone failure Problem: The subvolume snapshot clone fails if the quota on the source has exceeded. Since the quota is not strictly enforced at the byte range, this is a possibility. Cause: The quota on the clone is set prior to copying the data from the source. Hence the quota mostly get enforced before copying the entire data from the source resulting in the clone failure. Solution: Enforce quota on the clone after the data is copied. Fixes: https://tracker.ceph.com/issues/53848 Signed-off-by: Kotresh HR (cherry picked from commit 18b85c53af36d89a8c53b40cfc44fe06816a9733) --- src/pybind/mgr/volumes/fs/async_cloner.py | 16 ++++++++++++++++ .../fs/operations/versions/subvolume_v1.py | 7 +++++++ .../fs/operations/versions/subvolume_v2.py | 7 +++++++ 3 files changed, 30 insertions(+) diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index d2145eb15458..df3cb93582e9 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -4,6 +4,7 @@ import time import errno import logging from contextlib import contextmanager +from typing import Dict, Union import cephfs @@ -184,12 +185,27 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel): if should_cancel(): raise VolumeException(-errno.EINTR, "clone operation interrupted") +def set_quota_on_clone(fs_handle, clone_volumes_pair): + attrs = {} # type: Dict[str, Union[int, str, None]] + src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2]) + dst_path = clone_volumes_pair[0].path + try: + attrs["quota"] = int(fs_handle.getxattr(src_path, + 'ceph.quota.max_bytes' + ).decode('utf-8')) + except cephfs.NoData: + attrs["quota"] = None + + if attrs["quota"] is not None: + clone_volumes_pair[0].set_attrs(dst_path, attrs) + def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): with open_volume_lockless(fs_client, volname) as fs_handle: with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2]) dst_path = clone_volumes[0].path bulk_copy(fs_handle, src_path, dst_path, should_cancel) + set_quota_on_clone(fs_handle, clone_volumes) def log_clone_failure(volname, groupname, subvolname, ve): if ve.errno == -errno.EINTR: diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index b735ccd3a6f2..d0cf3ec130c0 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -150,6 +150,13 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): # attributes of subvolume's content though, are synced during the cloning process. attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) + # The source of the clone may have exceeded its quota limit as + # CephFS quotas are imprecise. Cloning such a source may fail if + # the quota on the destination is set before starting the clone + # copy. So always set the quota on destination after cloning is + # successful. + attrs["quota"] = None + # override snapshot pool setting, if one is provided for the clone if pool is not None: attrs["data_pool"] = pool diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 1dd6f3fe3aa8..f90ec2a4be47 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -217,6 +217,13 @@ class SubvolumeV2(SubvolumeV1): # attributes of subvolume's content though, are synced during the cloning process. attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) + # The source of the clone may have exceeded its quota limit as + # CephFS quotas are imprecise. Cloning such a source may fail if + # the quota on the destination is set before starting the clone + # copy. So always set the quota on destination after cloning is + # successful. + attrs["quota"] = None + # override snapshot pool setting, if one is provided for the clone if pool is not None: attrs["data_pool"] = pool -- 2.47.3