From 18b85c53af36d89a8c53b40cfc44fe06816a9733 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Wed, 12 Jan 2022 15:01:53 +0530 Subject: [PATCH] mgr/volumes: Fix subvoume snapshot clone failure Problem: The subvolume snapshot clone fails if the quota on the source has exceeded. Since the quota is not strictly enforced at the byte range, this is a possibility. Cause: The quota on the clone is set prior to copying the data from the source. Hence the quota mostly get enforced before copying the entire data from the source resulting in the clone failure. Solution: Enforce quota on the clone after the data is copied. Fixes: https://tracker.ceph.com/issues/53848 Signed-off-by: Kotresh HR --- src/pybind/mgr/volumes/fs/async_cloner.py | 16 ++++++++++++++++ .../fs/operations/versions/subvolume_v1.py | 7 +++++++ .../fs/operations/versions/subvolume_v2.py | 7 +++++++ 3 files changed, 30 insertions(+) diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index d1854712b16ec..68ebb9e263a7f 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -4,6 +4,7 @@ import time import errno import logging from contextlib import contextmanager +from typing import Dict, Union import cephfs from mgr_util import lock_timeout_log @@ -185,12 +186,27 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel): if should_cancel(): raise VolumeException(-errno.EINTR, "clone operation interrupted") +def set_quota_on_clone(fs_handle, clone_volumes_pair): + attrs = {} # type: Dict[str, Union[int, str, None]] + src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2]) + dst_path = clone_volumes_pair[0].path + try: + attrs["quota"] = int(fs_handle.getxattr(src_path, + 'ceph.quota.max_bytes' + ).decode('utf-8')) + except cephfs.NoData: + attrs["quota"] = None + + if attrs["quota"] is not None: + clone_volumes_pair[0].set_attrs(dst_path, attrs) + def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): with open_volume_lockless(fs_client, volname) as fs_handle: with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2]) dst_path = clone_volumes[0].path bulk_copy(fs_handle, src_path, dst_path, should_cancel) + set_quota_on_clone(fs_handle, clone_volumes) def log_clone_failure(volname, groupname, subvolname, ve): if ve.errno == -errno.EINTR: diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index d62effd995bd9..42c08e04712b1 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -152,6 +152,13 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): # attributes of subvolume's content though, are synced during the cloning process. attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) + # The source of the clone may have exceeded its quota limit as + # CephFS quotas are imprecise. Cloning such a source may fail if + # the quota on the destination is set before starting the clone + # copy. So always set the quota on destination after cloning is + # successful. + attrs["quota"] = None + # override snapshot pool setting, if one is provided for the clone if pool is not None: attrs["data_pool"] = pool diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 3937daba61f66..a827bb7a00e5e 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -219,6 +219,13 @@ class SubvolumeV2(SubvolumeV1): # attributes of subvolume's content though, are synced during the cloning process. attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) + # The source of the clone may have exceeded its quota limit as + # CephFS quotas are imprecise. Cloning such a source may fail if + # the quota on the destination is set before starting the clone + # copy. So always set the quota on destination after cloning is + # successful. + attrs["quota"] = None + # override snapshot pool setting, if one is provided for the clone if pool is not None: attrs["data_pool"] = pool -- 2.39.5