From: Kotresh HR Date: Wed, 6 Apr 2022 07:16:01 +0000 (+0530) Subject: mgr/volumes: Add clone failure reason in clone status X-Git-Tag: v18.0.0~1050^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e0ba36f3344a3633b0343565b63472bbf94538b2;p=ceph-ci.git mgr/volumes: Add clone failure reason in clone status Add the clone failure reason in the clone status. The sample output is as below: $ ceph fs clone status cephfs clone_0 { "status": { "state": "failed", "source": { "volume": "cephfs", "subvolume": "subvolume_0", "snapshot": "snapshot_0", "size": "52428800" }, "failure": { "errno": "2", "error_msg": "snapshot 'snapshot_0' does not exist" } } } Fixes: https://tracker.ceph.com/issues/55190 Signed-off-by: Kotresh HR --- diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index 810397031f5..5a1fcbfdf5d 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -106,6 +106,8 @@ def handle_clone_pending(fs_client, volspec, volname, index, groupname, subvolna next_state = SubvolumeOpSm.transition(SubvolumeTypes.TYPE_CLONE, SubvolumeStates.STATE_PENDING, SubvolumeActions.ACTION_CANCELLED) + update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, + VolumeException(-errno.EINTR, "user interrupted clone operation")) else: next_state = SubvolumeOpSm.transition(SubvolumeTypes.TYPE_CLONE, SubvolumeStates.STATE_PENDING, @@ -184,7 +186,7 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel): raise VolumeException(-e.args[0], e.args[1]) cptree(source_path, dst_path) if should_cancel(): - raise VolumeException(-errno.EINTR, "clone operation interrupted") + raise VolumeException(-errno.EINTR, "user interrupted clone operation") def set_quota_on_clone(fs_handle, clone_volumes_pair): src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2]) @@ -225,6 +227,14 @@ def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): bulk_copy(fs_handle, src_path, dst_path, should_cancel) set_quota_on_clone(fs_handle, clone_volumes) +def update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, ve): + with open_volume_lockless(fs_client, volname) as fs_handle: + with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: + if ve.errno == -errno.EINTR: + clone_volumes[0].add_clone_failure(-ve.errno, "user interrupted clone operation") + else: + clone_volumes[0].add_clone_failure(-ve.errno, ve.error_str) + def log_clone_failure(volname, groupname, subvolname, ve): if ve.errno == -errno.EINTR: log.info("Clone cancelled: ({0}, {1}, {2})".format(volname, groupname, subvolname)) @@ -240,6 +250,7 @@ def handle_clone_in_progress(fs_client, volspec, volname, index, groupname, subv SubvolumeStates.STATE_INPROGRESS, SubvolumeActions.ACTION_SUCCESS) except VolumeException as ve: + update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, ve) log_clone_failure(volname, groupname, subvolname, ve) next_state = get_next_state_on_error(ve.errno) except OpSmException as oe: @@ -349,6 +360,7 @@ class Cloner(AsyncJobs): clone_state, SubvolumeActions.ACTION_CANCELLED) clone_subvolume.state = (next_state, True) + clone_subvolume.add_clone_failure(errno.EINTR, "user interrupted clone operation") s_subvolume.detach_snapshot(s_snapname, track_idx.decode('utf-8')) def cancel_job(self, volname, job): diff --git a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py index 1b6c4327837..942353926d6 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py @@ -26,6 +26,10 @@ class MetadataManager(object): GLOBAL_META_KEY_PATH = "path" GLOBAL_META_KEY_STATE = "state" + CLONE_FAILURE_SECTION = "CLONE_FAILURE" + CLONE_FAILURE_META_KEY_ERRNO = "errno" + CLONE_FAILURE_META_KEY_ERROR_MSG = "error_msg" + MAX_IO_BYTES = 8 * 1024 def __init__(self, fs, config_path, mode): diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index 42c08e04712..0e63b85878d 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -139,6 +139,14 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): if flush: self.metadata_mgr.flush() + def add_clone_failure(self, errno, error_msg): + self.metadata_mgr.add_section(MetadataManager.CLONE_FAILURE_SECTION) + self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION, + MetadataManager.CLONE_FAILURE_META_KEY_ERRNO, errno) + self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION, + MetadataManager.CLONE_FAILURE_META_KEY_ERROR_MSG, error_msg) + self.metadata_mgr.flush() + def create_clone(self, pool, source_volname, source_subvolume, snapname): subvolume_type = SubvolumeTypes.TYPE_CLONE try: @@ -660,6 +668,13 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): raise VolumeException(-errno.EINVAL, "error fetching subvolume metadata") return clone_source + def _get_clone_failure(self): + clone_failure = { + 'errno' : self.metadata_mgr.get_option(MetadataManager.CLONE_FAILURE_SECTION, MetadataManager.CLONE_FAILURE_META_KEY_ERRNO), + 'error_msg' : self.metadata_mgr.get_option(MetadataManager.CLONE_FAILURE_SECTION, MetadataManager.CLONE_FAILURE_META_KEY_ERROR_MSG), + } + return clone_failure + @property def status(self): state = SubvolumeStates.from_value(self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_STATE)) @@ -669,6 +684,12 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): } if not SubvolumeOpSm.is_complete_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE: subvolume_status["source"] = self._get_clone_source() + if SubvolumeOpSm.is_failed_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE: + try: + subvolume_status["failure"] = self._get_clone_failure() + except MetadataMgrException: + pass + return subvolume_status @property