From a08ddab34abefaab5647e87552f3095c329d6b39 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Tue, 17 Sep 2024 12:57:11 +0530 Subject: [PATCH] mgr/volumes: add earmarking for subvol - Implemented earmarking functionality for subvolumes and subvolume groups, ensuring that they do not share the same subvolume or subvolume group to avoid data corruption due to unsupported mixed protocol use. - Added a command to set set/rm earmark from a given subvolume / subvolume group - Adding __init__.py for python-common/ceph/fs dir to have it as a package. - Fixed subvolume info command when no earmark is set. Fixes: https://tracker.ceph.com/issues/67460 Signed-off-by: Avan Thakkar --- .../mgr/volumes/fs/operations/subvolume.py | 5 +- .../mgr/volumes/fs/operations/template.py | 3 + .../fs/operations/versions/subvolume_base.py | 28 ++++- .../fs/operations/versions/subvolume_v1.py | 5 +- .../fs/operations/versions/subvolume_v2.py | 5 +- src/pybind/mgr/volumes/fs/volume.py | 74 +++++++++++- src/pybind/mgr/volumes/module.py | 55 ++++++++- src/python-common/ceph/fs/__init__.py | 3 + src/python-common/ceph/fs/earmarking.py | 108 ++++++++++++++++++ 9 files changed, 272 insertions(+), 14 deletions(-) create mode 100644 src/python-common/ceph/fs/__init__.py create mode 100644 src/python-common/ceph/fs/earmarking.py diff --git a/src/pybind/mgr/volumes/fs/operations/subvolume.py b/src/pybind/mgr/volumes/fs/operations/subvolume.py index f982f73bf7df2..256c8d7607492 100644 --- a/src/pybind/mgr/volumes/fs/operations/subvolume.py +++ b/src/pybind/mgr/volumes/fs/operations/subvolume.py @@ -5,7 +5,7 @@ from .group import open_group from .template import SubvolumeOpType from .versions import loaded_subvolumes -def create_subvol(mgr, fs, vol_spec, group, subvolname, size, isolate_nspace, pool, mode, uid, gid): +def create_subvol(mgr, fs, vol_spec, group, subvolname, size, isolate_nspace, pool, mode, uid, gid, earmark): """ create a subvolume (create a subvolume with the max known version). @@ -18,10 +18,11 @@ def create_subvol(mgr, fs, vol_spec, group, subvolname, size, isolate_nspace, po :param mode: the user permissions :param uid: the user identifier :param gid: the group identifier + :param earmark: metadata string to identify if subvolume is associated with nfs/smb :return: None """ subvolume = loaded_subvolumes.get_subvolume_object_max(mgr, fs, vol_spec, group, subvolname) - subvolume.create(size, isolate_nspace, pool, mode, uid, gid) + subvolume.create(size, isolate_nspace, pool, mode, uid, gid, earmark) def create_clone(mgr, fs, vol_spec, group, subvolname, pool, source_volume, source_subvolume, snapname): diff --git a/src/pybind/mgr/volumes/fs/operations/template.py b/src/pybind/mgr/volumes/fs/operations/template.py index eb55bd7432519..2436863fd1798 100644 --- a/src/pybind/mgr/volumes/fs/operations/template.py +++ b/src/pybind/mgr/volumes/fs/operations/template.py @@ -68,6 +68,9 @@ class SubvolumeOpType(Enum): SNAP_METADATA_GET = 'snap-metadata-get' SNAP_METADATA_LIST = 'snap-metadata-ls' SNAP_METADATA_REMOVE = 'snap-metadata-rm' + EARMARK_GET = 'earmark-get' + EARMARK_SET = 'earmark-set' + EARMARK_CLEAR = 'earmark-clear' class SubvolumeTemplate(object): VERSION = None # type: int diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py index 8fbe177e5f4b5..05df31014170a 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py @@ -18,6 +18,8 @@ from ...exception import MetadataMgrException, VolumeException from .auth_metadata import AuthMetadataManager from .subvolume_attrs import SubvolumeStates +from ceph.fs.earmarking import CephFSVolumeEarmarking, EarmarkException # type: ignore + log = logging.getLogger(__name__) @@ -192,6 +194,14 @@ class SubvolumeBase(object): except cephfs.NoData: attrs["quota"] = None + try: + fs_earmark = CephFSVolumeEarmarking(self.fs, pathname) + attrs["earmark"] = fs_earmark.get_earmark() + except cephfs.NoData: + attrs["earmark"] = '' + except EarmarkException: + attrs["earmark"] = '' + return attrs def set_attrs(self, path, attrs): @@ -277,6 +287,12 @@ class SubvolumeBase(object): if mode is not None: self.fs.lchmod(path, mode) + # set earmark + earmark = attrs.get("earmark") + if earmark is not None: + fs_earmark = CephFSVolumeEarmarking(self.fs, path) + fs_earmark.set_earmark(earmark) + def _resize(self, path, newsize, noshrink): try: newsize = int(newsize) @@ -418,6 +434,14 @@ class SubvolumeBase(object): except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) + try: + fs_earmark = CephFSVolumeEarmarking(self.fs, subvolpath) + earmark = fs_earmark.get_earmark() + except cephfs.NoData: + earmark = '' + except EarmarkException: + earmark = '' + return {'path': subvolpath, 'type': etype.value, 'uid': int(st["uid"]), @@ -434,7 +458,9 @@ class SubvolumeBase(object): if nsize == 0 else '{0:.2f}'.format((float(usedbytes) / nsize) * 100.0), 'pool_namespace': pool_namespace, - 'features': self.features, 'state': self.state.value} + 'features': self.features, + 'state': self.state.value, + 'earmark': earmark} def set_user_metadata(self, keyname, value): try: diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index 41c01c843b08a..33d364b8b452a 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -85,7 +85,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): """ Path to user data directory within a subvolume snapshot named 'snapname' """ return self.snapshot_path(snapname) - def create(self, size, isolate_nspace, pool, mode, uid, gid): + def create(self, size, isolate_nspace, pool, mode, uid, gid, earmark): subvolume_type = SubvolumeTypes.TYPE_NORMAL try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) @@ -103,7 +103,8 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): 'gid': gid, 'data_pool': pool, 'pool_namespace': self.namespace if isolate_nspace else None, - 'quota': size + 'quota': size, + 'earmark': earmark } self.set_attrs(subvol_path, attrs) diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 55d7f945b7750..bec271f659fbf 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -154,7 +154,7 @@ class SubvolumeV2(SubvolumeV1): self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath) self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value) - def create(self, size, isolate_nspace, pool, mode, uid, gid): + def create(self, size, isolate_nspace, pool, mode, uid, gid, earmark): subvolume_type = SubvolumeTypes.TYPE_NORMAL try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) @@ -175,7 +175,8 @@ class SubvolumeV2(SubvolumeV1): 'gid': gid, 'data_pool': pool, 'pool_namespace': self.namespace if isolate_nspace else None, - 'quota': size + 'quota': size, + 'earmark': earmark } self.set_attrs(subvol_path, attrs) diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py index 3f3b16020494d..43ca060607d03 100644 --- a/src/pybind/mgr/volumes/fs/volume.py +++ b/src/pybind/mgr/volumes/fs/volume.py @@ -4,11 +4,13 @@ import logging import mgr_util import inspect import functools -from typing import TYPE_CHECKING, Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple from urllib.parse import urlsplit, urlunsplit import cephfs +from ceph.fs.earmarking import CephFSVolumeEarmarking, EarmarkException # type: ignore + from mgr_util import CephfsClient from .fs_util import listdir, has_subdir @@ -229,11 +231,13 @@ class VolumeClient(CephfsClient["Module"]): gid = kwargs['gid'] mode = kwargs['mode'] isolate_nspace = kwargs['namespace_isolated'] + earmark = kwargs['earmark'] or '' # if not set, default to empty string --> no earmark oct_mode = octal_str_to_decimal_int(mode) + try: create_subvol( - self.mgr, fs_handle, self.volspec, group, subvolname, size, isolate_nspace, pool, oct_mode, uid, gid) + self.mgr, fs_handle, self.volspec, group, subvolname, size, isolate_nspace, pool, oct_mode, uid, gid, earmark) except VolumeException as ve: # kick the purge threads for async removal -- note that this # assumes that the subvolume is moved to trashcan for cleanup on error. @@ -251,6 +255,7 @@ class VolumeClient(CephfsClient["Module"]): gid = kwargs['gid'] mode = kwargs['mode'] isolate_nspace = kwargs['namespace_isolated'] + earmark = kwargs['earmark'] or '' # if not set, default to empty string --> no earmark try: with open_volume(self, volname) as fs_handle: @@ -264,7 +269,8 @@ class VolumeClient(CephfsClient["Module"]): 'mode': octal_str_to_decimal_int(mode), 'data_pool': pool, 'pool_namespace': subvolume.namespace if isolate_nspace else None, - 'quota': size + 'quota': size, + 'earmark': earmark } subvolume.set_attrs(subvolume.path, attrs) except VolumeException as ve: @@ -606,6 +612,68 @@ class VolumeClient(CephfsClient["Module"]): ret = self.volume_exception_to_retval(ve) return ret + def get_earmark(self, **kwargs) -> Tuple[int, Optional[str], str]: + ret: Tuple[int, Optional[str], str] = 0, "", "" + volname = kwargs['vol_name'] + subvolname = kwargs['sub_name'] + groupname = kwargs['group_name'] + + try: + with open_volume(self, volname) as fs_handle: + with open_group(fs_handle, self.volspec, groupname) as group: + with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.EARMARK_GET) as subvolume: + log.info("Getting earmark for subvolume %s", subvolume.path) + fs_earmark = CephFSVolumeEarmarking(fs_handle, subvolume.path) + earmark = fs_earmark.get_earmark() + ret = 0, earmark, "" + except VolumeException as ve: + ret = self.volume_exception_to_retval(ve) + except EarmarkException as ee: + log.error(f"Earmark error occurred: {ee}") + ret = ee.to_tuple() + return ret + + def set_earmark(self, **kwargs): # type: ignore + ret = 0, "", "" + volname = kwargs['vol_name'] + subvolname = kwargs['sub_name'] + groupname = kwargs['group_name'] + earmark = kwargs['earmark'] + + try: + with open_volume(self, volname) as fs_handle: + with open_group(fs_handle, self.volspec, groupname) as group: + with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.EARMARK_SET) as subvolume: + log.info("Setting earmark %s for subvolume %s", earmark, subvolume.path) + fs_earmark = CephFSVolumeEarmarking(fs_handle, subvolume.path) + fs_earmark.set_earmark(earmark) + except VolumeException as ve: + ret = self.volume_exception_to_retval(ve) + except EarmarkException as ee: + log.error(f"Earmark error occurred: {ee}") + ret = ee.to_tuple() # type: ignore + return ret + + def clear_earmark(self, **kwargs): # type: ignore + ret = 0, "", "" + volname = kwargs['vol_name'] + subvolname = kwargs['sub_name'] + groupname = kwargs['group_name'] + + try: + with open_volume(self, volname) as fs_handle: + with open_group(fs_handle, self.volspec, groupname) as group: + with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.EARMARK_CLEAR) as subvolume: + log.info("Removing earmark for subvolume %s", subvolume.path) + fs_earmark = CephFSVolumeEarmarking(fs_handle, subvolume.path) + fs_earmark.clear_earmark() + except VolumeException as ve: + ret = self.volume_exception_to_retval(ve) + except EarmarkException as ee: + log.error(f"Earmark error occurred: {ee}") + ret = ee.to_tuple() # type: ignore + return ret + ### subvolume snapshot def create_subvolume_snapshot(self, **kwargs): diff --git a/src/pybind/mgr/volumes/module.py b/src/pybind/mgr/volumes/module.py index e059648261e9e..6d768457f19b6 100644 --- a/src/pybind/mgr/volumes/module.py +++ b/src/pybind/mgr/volumes/module.py @@ -141,7 +141,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): 'name=uid,type=CephInt,req=false ' 'name=gid,type=CephInt,req=false ' 'name=mode,type=CephString,req=false ' - 'name=namespace_isolated,type=CephBool,req=false ', + 'name=namespace_isolated,type=CephBool,req=false ' + 'name=earmark,type=CephString,req=false ', 'desc': "Create a CephFS subvolume in a volume, and optionally, " "with a specific size (in bytes), a specific data pool layout, " "a specific mode, in a specific subvolume group and in separate " @@ -272,6 +273,31 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): "and optionally, in a specific subvolume group", 'perm': 'rw' }, + { + 'cmd': 'fs subvolume earmark get ' + 'name=vol_name,type=CephString ' + 'name=sub_name,type=CephString ' + 'name=group_name,type=CephString,req=false ', + 'desc': "Get earmark for a subvolume", + 'perm': 'r' + }, + { + 'cmd': 'fs subvolume earmark set ' + 'name=vol_name,type=CephString ' + 'name=sub_name,type=CephString ' + 'name=group_name,type=CephString,req=false ' + 'name=earmark,type=CephString ', + 'desc': "Set earmark for a subvolume", + 'perm': 'rw' + }, + { + 'cmd': 'fs subvolume earmark rm ' + 'name=vol_name,type=CephString ' + 'name=sub_name,type=CephString ' + 'name=group_name,type=CephString,req=false ', + 'desc': "Remove earmark from a subvolume", + 'perm': 'rw' + }, { 'cmd': 'fs quiesce ' 'name=vol_name,type=CephString ' @@ -631,6 +657,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): group_name=cmd['group_name'], new_size=cmd['new_size'], no_shrink=cmd.get('no_shrink', False)) + @mgr_cmd_wrap def _cmd_fs_subvolumegroup_ls(self, inbuf, cmd): return self.vc.list_subvolume_groups(vol_name=cmd['vol_name']) @@ -652,7 +679,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): uid=cmd.get('uid', None), gid=cmd.get('gid', None), mode=cmd.get('mode', '755'), - namespace_isolated=cmd.get('namespace_isolated', False)) + namespace_isolated=cmd.get('namespace_isolated', False), + earmark=cmd.get('earmark', None)) @mgr_cmd_wrap def _cmd_fs_subvolume_rm(self, inbuf, cmd): @@ -733,7 +761,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): def _cmd_fs_subvolume_exist(self, inbuf, cmd): return self.vc.subvolume_exists(vol_name=cmd['vol_name'], group_name=cmd.get('group_name', None)) - + @mgr_cmd_wrap def _cmd_fs_subvolume_metadata_set(self, inbuf, cmd): return self.vc.set_user_metadata(vol_name=cmd['vol_name'], @@ -762,7 +790,26 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): key_name=cmd['key_name'], group_name=cmd.get('group_name', None), force=cmd.get('force', False)) - + + @mgr_cmd_wrap + def _cmd_fs_subvolume_earmark_get(self, inbuf, cmd): + return self.vc.get_earmark(vol_name=cmd['vol_name'], + sub_name=cmd['sub_name'], + group_name=cmd.get('group_name', None)) + + @mgr_cmd_wrap + def _cmd_fs_subvolume_earmark_set(self, inbuf, cmd): + return self.vc.set_earmark(vol_name=cmd['vol_name'], + sub_name=cmd['sub_name'], + group_name=cmd.get('group_name', None), + earmark=cmd['earmark']) + + @mgr_cmd_wrap + def _cmd_fs_subvolume_earmark_rm(self, inbuf, cmd): + return self.vc.clear_earmark(vol_name=cmd['vol_name'], + sub_name=cmd['sub_name'], + group_name=cmd.get('group_name', None)) + @mgr_cmd_wrap def _cmd_fs_quiesce(self, inbuf, cmd): return self.vc.quiesce(cmd) diff --git a/src/python-common/ceph/fs/__init__.py b/src/python-common/ceph/fs/__init__.py new file mode 100644 index 0000000000000..3988bf129e242 --- /dev/null +++ b/src/python-common/ceph/fs/__init__.py @@ -0,0 +1,3 @@ +import logging + +log = logging.getLogger(__name__) diff --git a/src/python-common/ceph/fs/earmarking.py b/src/python-common/ceph/fs/earmarking.py new file mode 100644 index 0000000000000..3d11da933397f --- /dev/null +++ b/src/python-common/ceph/fs/earmarking.py @@ -0,0 +1,108 @@ +""" +Module: CephFS Volume Earmarking + +This module provides the `CephFSVolumeEarmarking` class, which is designed to manage the earmarking +of subvolumes within a CephFS filesystem. The earmarking mechanism allows +administrators to tag specific subvolumes with identifiers that indicate their intended use +such as NFS or SMB, ensuring that only one file service is assigned to a particular subvolume +at a time. This is crucial to prevent data corruption in environments where +mixed protocol support (NFS and SMB) is not yet available. + +Key Features: +- **Set Earmark**: Assigns an earmark to a subvolume. +- **Get Earmark**: Retrieves the existing earmark of a subvolume, if any. +- **Remove Earmark**: Removes the earmark from a subvolume, making it available for reallocation. +- **Validate Earmark**: Ensures that the earmark follows the correct format and only uses +supported top-level scopes. +""" + +import errno +import enum +import logging +from typing import Optional, Tuple + +log = logging.getLogger(__name__) + +XATTR_SUBVOLUME_EARMARK_NAME = 'user.ceph.subvolume.earmark' + + +class EarmarkTopScope(enum.Enum): + NFS = "nfs" + SMB = "smb" + + +class EarmarkException(Exception): + def __init__(self, error_code: int, error_message: str) -> None: + self.errno = error_code + self.error_str = error_message + + def to_tuple(self) -> Tuple[int, Optional[str], str]: + return self.errno, "", self.error_str + + def __str__(self) -> str: + return f"{self.errno} ({self.error_str})" + + +class CephFSVolumeEarmarking: + def __init__(self, fs, path: str) -> None: + self.fs = fs + self.path = path + + def _handle_cephfs_error(self, e: Exception, action: str) -> None: + if isinstance(e, ValueError): + raise EarmarkException(errno.EINVAL, f"Invalid earmark specified: {e}") from e + elif isinstance(e, OSError): + log.error(f"Error {action} earmark: {e}") + raise EarmarkException(-e.errno, e.strerror) from e + else: + log.error(f"Unexpected error {action} earmark: {e}") + raise EarmarkException(errno.EIO, "Unexpected error") from e + + def _validate_earmark(self, earmark: str) -> bool: + """ + Validates that the earmark string is either empty or composed of parts separated by scopes, + with the top-level scope being either 'nfs' or 'smb'. + + :param earmark: The earmark string to validate. + :return: True if valid, False otherwise. + """ + if not earmark or earmark in (scope.value for scope in EarmarkTopScope): + return True + + parts = earmark.split('.') + + if parts[0] not in (scope.value for scope in EarmarkTopScope): + return False + + # Check if all parts are non-empty (to ensure valid dot-separated format) + return all(parts) + + def get_earmark(self) -> Optional[str]: + try: + earmark_value = ( + self.fs.getxattr(self.path, XATTR_SUBVOLUME_EARMARK_NAME) + .decode('utf-8') + ) + return earmark_value + except Exception as e: + self._handle_cephfs_error(e, "getting") + return None + + def set_earmark(self, earmark: str): + # Validate the earmark before attempting to set it + if not self._validate_earmark(earmark): + raise EarmarkException( + errno.EINVAL, + f"Invalid earmark specified: '{earmark}'. " + "A valid earmark should either be empty or start with 'nfs' or 'smb', " + "followed by dot-separated non-empty components." + ) + + try: + self.fs.setxattr(self.path, XATTR_SUBVOLUME_EARMARK_NAME, earmark.encode('utf-8'), 0) + log.info(f"Earmark '{earmark}' set on {self.path}.") + except Exception as e: + self._handle_cephfs_error(e, "setting") + + def clear_earmark(self) -> None: + self.set_earmark("") -- 2.39.5