From: Shweta Bhosale Date: Wed, 18 Feb 2026 14:29:58 +0000 (+0530) Subject: mgr/nfs: 1. Removed the option to enable and disable cluster wide qos, it will be... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=99b2729b5ebb93be25c48a3135648532d9d54468;p=ceph.git mgr/nfs: 1. Removed the option to enable and disable cluster wide qos, it will be enabled by default 2. Removed the cluster_enable_qos field from the cluster-level block as it was causing confusion for the user. 3. Instead of using cluster use global while showing cluster level qos values in export qos get Fixes: https://tracker.ceph.com/issues/69861 Signed-off-by: Shweta Bhosale --- diff --git a/src/pybind/mgr/nfs/cluster.py b/src/pybind/mgr/nfs/cluster.py index 1ac8a518961a..5a4cf057aa22 100644 --- a/src/pybind/mgr/nfs/cluster.py +++ b/src/pybind/mgr/nfs/cluster.py @@ -3,7 +3,6 @@ import logging import re import socket from typing import cast, Dict, List, Any, Union, Optional, TYPE_CHECKING -from enum import Enum from mgr_module import NFS_POOL_NAME as POOL_NAME from ceph.deployment.service_spec import NFSServiceSpec, PlacementSpec, IngressSpec @@ -40,11 +39,6 @@ if TYPE_CHECKING: log = logging.getLogger(__name__) -class ClusterQosAction(Enum): - enable = 'enable' - disable = 'disable' - - def resolve_ip(hostname: str) -> str: try: r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME, @@ -80,9 +74,7 @@ def config_cluster_qos_from_dict( if not qos_type: raise NFSInvalidOperation('qos_type is not specified in qos dict') qos_type = QOSType[str(qos_type)] - enable_cluster_qos = qos_dict.get(QOSParams.enable_cluster_qos.value, True) clust_qos_msg_interval = int(qos_dict.get(QOSParams.clust_qos_msg_interval.value, 0)) - assert isinstance(enable_cluster_qos, (bool, type(None))) enable_bw_ctrl = qos_dict.get(QOSParams.enable_bw_ctrl.value) combined_bw_ctrl = qos_dict.get(QOSParams.combined_bw_ctrl.value) enable_iops_ctrl = qos_dict.get(QOSParams.enable_iops_ctrl.value) @@ -112,7 +104,6 @@ def config_cluster_qos_from_dict( cluster_id=cluster_id, qos_obj=None, enable_qos=True, - enable_cluster_qos=enable_cluster_qos, clust_qos_msg_interval=clust_qos_msg_interval, qos_type=qos_type, bw_obj=bw_obj, @@ -126,7 +117,6 @@ def write_cluster_qos_obj( cluster_id: str, qos_obj: Optional[QOS], enable_qos: bool, - enable_cluster_qos: Optional[bool] = None, clust_qos_msg_interval: int = 0, qos_type: Optional[QOSType] = None, bw_obj: Optional[QOSBandwidthControl] = None, @@ -136,12 +126,11 @@ def write_cluster_qos_obj( qos_obj_exists = False if not qos_obj: log.debug(f"Creating new QoS block for cluster {cluster_id}") - qos_obj = QOS(True, enable_qos, enable_cluster_qos, clust_qos_msg_interval, qos_type, bw_obj, ops_obj) + qos_obj = QOS(True, enable_qos, clust_qos_msg_interval, qos_type, bw_obj, ops_obj) else: log.debug(f"Updating existing QoS block for cluster {cluster_id}") qos_obj_exists = True qos_obj.enable_qos = enable_qos - qos_obj.enable_cluster_qos = enable_cluster_qos qos_obj.clust_qos_msg_interval = validate_clust_qos_msg_interval(clust_qos_msg_interval) qos_obj.qos_type = qos_type if bw_obj: @@ -498,7 +487,6 @@ class NFSCluster: cluster_id: str, qos_obj: Optional[QOS], enable_qos: bool, - enable_cluster_qos: Optional[bool] = None, clust_qos_msg_interval: int = 0, qos_type: Optional[QOSType] = None, bw_obj: Optional[QOSBandwidthControl] = None, @@ -509,7 +497,6 @@ class NFSCluster: cluster_id=cluster_id, qos_obj=qos_obj, enable_qos=enable_qos, - enable_cluster_qos=enable_cluster_qos, clust_qos_msg_interval=clust_qos_msg_interval, qos_type=qos_type, bw_obj=bw_obj, @@ -520,14 +507,13 @@ class NFSCluster: cluster_id: str, qos_obj: Optional[QOS], enable_qos: bool, - enable_cluster_qos: Optional[bool] = None, clust_qos_msg_interval: int = 0, qos_type: Optional[QOSType] = None, bw_obj: Optional[QOSBandwidthControl] = None, ops_obj: Optional[QOSOpsControl] = None) -> None: try: if cluster_id in available_clusters(self.mgr): - self.update_cluster_qos_obj(cluster_id, qos_obj, enable_qos, enable_cluster_qos, + self.update_cluster_qos_obj(cluster_id, qos_obj, enable_qos, clust_qos_msg_interval, qos_type, bw_obj, ops_obj) restart_nfs_service(self.mgr, cluster_id) return @@ -592,7 +578,6 @@ class NFSCluster: cluster_id, qos_obj, True, - enable_cluster_qos=True, qos_type=qos_type, bw_obj=bw_obj ) @@ -619,16 +604,14 @@ class NFSCluster: qos_obj = self.get_cluster_qos_config(cluster_id) status = False qos_type = None - enable_cluster_qos = None clust_qos_msg_interval = 0 if qos_obj: status = qos_obj.get_enable_qos_val(disable_bw=True) if status: qos_type = qos_obj.qos_type - enable_cluster_qos = qos_obj.enable_cluster_qos if qos_obj.clust_qos_msg_interval: clust_qos_msg_interval = qos_obj.clust_qos_msg_interval - self.update_cluster_qos(cluster_id, qos_obj, status, enable_cluster_qos, + self.update_cluster_qos(cluster_id, qos_obj, status, clust_qos_msg_interval, qos_type=qos_type, bw_obj=QOSBandwidthControl()) log.info("Cluster-level QoS bandwidth control has been successfully disabled for " f"cluster {cluster_id}. As a result, export-level bandwidth control will " @@ -648,7 +631,6 @@ class NFSCluster: cluster_id, qos_obj, True, - enable_cluster_qos=True, qos_type=qos_type, ops_obj=ops_obj ) @@ -665,16 +647,14 @@ class NFSCluster: qos_obj = self.get_cluster_qos_config(cluster_id) status = False qos_type = None - enable_cluster_qos = None clust_qos_msg_interval = 0 if qos_obj: status = qos_obj.get_enable_qos_val(disable_ops=True) if status: qos_type = qos_obj.qos_type - enable_cluster_qos = qos_obj.enable_cluster_qos if qos_obj.clust_qos_msg_interval: clust_qos_msg_interval = qos_obj.clust_qos_msg_interval - self.update_cluster_qos(cluster_id, qos_obj, status, enable_cluster_qos, + self.update_cluster_qos(cluster_id, qos_obj, status, clust_qos_msg_interval, qos_type=qos_type, ops_obj=QOSOpsControl()) log.info("Cluster-level QoS IOPS control has been successfully disabled for " f"cluster {cluster_id}. As a result, export-level ops control will " @@ -684,39 +664,26 @@ class NFSCluster: log.exception(f"Setting NFS-Ganesha QoS IOPS control config failed for {cluster_id}") raise ErrorResponse.wrap(e) - def global_cluster_qos_action( + def cluster_qos_set_config( self, cluster_id: str, - action: str, msg_interval: int = 0 ) -> None: try: qos_obj = self.get_cluster_qos_config(cluster_id) if not qos_obj: - err_msg = f'No existing QoS configuration found for cluster {cluster_id}. Can not {action} cluster-qos' + err_msg = f'No existing QoS configuration found for cluster {cluster_id}.' log.error(err_msg) raise Exception(err_msg) - clust_qos_msg_interval = 0 - if action == 'enable': - if (qos_obj.enable_cluster_qos or qos_obj.enable_cluster_qos is None) and not msg_interval: - log.info('Cluster QoS is already enabled') - return - - enable_cluster_qos = True - clust_qos_msg_interval = msg_interval - else: # disable - enable_cluster_qos = False self.update_cluster_qos( cluster_id=cluster_id, qos_obj=qos_obj, enable_qos=qos_obj.enable_qos, - enable_cluster_qos=enable_cluster_qos, - clust_qos_msg_interval=clust_qos_msg_interval, + clust_qos_msg_interval=msg_interval, qos_type=qos_obj.qos_type ) - action_past = "enabled" if action == "enable" else "disabled" - log.info(f"Cluster-level QoS has been successfully {action_past} for cluster {cluster_id}") + log.info("Cluster-level QoS config updated successfully for cluster %s", cluster_id) except Exception as e: - log.exception(f"Failed to {action} cluster-level QoS for cluster {cluster_id}") + log.exception("Failed to update cluster-level QoS config for cluster %s", cluster_id) raise ErrorResponse.wrap(e) diff --git a/src/pybind/mgr/nfs/export.py b/src/pybind/mgr/nfs/export.py index a0a2c84748a3..9194f380eb83 100644 --- a/src/pybind/mgr/nfs/export.py +++ b/src/pybind/mgr/nfs/export.py @@ -953,11 +953,11 @@ class ExportMgr: clust_qos_obj = get_cluster_qos_config(cluster_id, self.mgr) clust_qos_conf = {} if clust_qos_obj: - clust_qos_conf[f'cluster_{QOSParams.enable_qos.value}'] = clust_qos_obj.enable_qos + clust_qos_conf[f'global_{QOSParams.enable_qos.value}'] = clust_qos_obj.enable_qos if clust_qos_obj.bw_obj: - clust_qos_conf[f'cluster_{QOSParams.enable_bw_ctrl.value}'] = clust_qos_obj.bw_obj.enable_bw_ctrl + clust_qos_conf[f'global_{QOSParams.enable_bw_ctrl.value}'] = clust_qos_obj.bw_obj.enable_bw_ctrl if clust_qos_obj.ops_obj: - clust_qos_conf[f'cluster_{QOSParams.enable_iops_ctrl.value}'] = clust_qos_obj.ops_obj.enable_iops_ctrl + clust_qos_conf[f'global_{QOSParams.enable_iops_ctrl.value}'] = clust_qos_obj.ops_obj.enable_iops_ctrl export_obj = self.get_export_obj(cluster_id, pseudo_path) if export_obj.qos_block: diff --git a/src/pybind/mgr/nfs/ganesha_conf.py b/src/pybind/mgr/nfs/ganesha_conf.py index 78be4251a341..04f97bcba552 100644 --- a/src/pybind/mgr/nfs/ganesha_conf.py +++ b/src/pybind/mgr/nfs/ganesha_conf.py @@ -391,7 +391,7 @@ class Export: if b.block_name == "CLIENT"] qos_block = [b for b in export_block.blocks - if b.block_name == "qos_block"] + if b.block_name == "QOS_BLOCK"] qos_block = QOS.from_qos_block(qos_block[0]) if qos_block else None protocols = export_block.values.get('protocols') diff --git a/src/pybind/mgr/nfs/module.py b/src/pybind/mgr/nfs/module.py index aab322205f2c..ab6369e346ea 100644 --- a/src/pybind/mgr/nfs/module.py +++ b/src/pybind/mgr/nfs/module.py @@ -12,7 +12,7 @@ from orchestrator.module import IngressType from mgr_util import CephFSEarmarkResolver from .export import ExportMgr, AppliedExportResults -from .cluster import NFSCluster, ClusterQosAction +from .cluster import NFSCluster from .utils import available_clusters from .qos_conf import QOSType, QOSBandwidthControl, UserQoSType, QOSOpsControl @@ -345,15 +345,13 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): """Disable QOS bandwidth control for NFS cluster""" return self.nfs.disable_cluster_qos_bw(cluster_id) - @CLICommand('nfs cluster cluster_qos', perm='rw') + @CLICommand('nfs cluster qos set', perm='rw') @object_format.EmptyResponder() def _cmd_nfs_cluster_global_qos(self, cluster_id: str, - action: ClusterQosAction, - msg_interval: int = 0) -> None: - """Enable or disable cluster-wide QoS. If disabled, QoS remains enabled, - but the configured values apply on a per-host basis""" - return self.nfs.global_cluster_qos_action(cluster_id, action.name, msg_interval) + msg_interval: int) -> None: + """Set the message interval for cluster QoS synchronization among hosts.""" + return self.nfs.cluster_qos_set_config(cluster_id, msg_interval) @CLICommand('nfs cluster qos get', perm='r') @object_format.Responder() diff --git a/src/pybind/mgr/nfs/qos_conf.py b/src/pybind/mgr/nfs/qos_conf.py index dbebc11c60a7..cb511db1e006 100644 --- a/src/pybind/mgr/nfs/qos_conf.py +++ b/src/pybind/mgr/nfs/qos_conf.py @@ -9,7 +9,6 @@ class QOSParams(Enum): clust_block = "QOS_DEFAULT_CONFIG" export_block = "QOS_BLOCK" enable_qos = "enable_qos" - enable_cluster_qos = "enable_cluster_qos" clust_qos_msg_interval = "cqos_msg_interval" qos_type = "qos_type" # bandwidth control @@ -326,7 +325,6 @@ class QOS(object): self, cluster_op: bool = False, enable_qos: bool = False, - enable_cluster_qos: Optional[bool] = None, clust_qos_msg_interval: int = 0, qos_type: Optional[QOSType] = None, bw_obj: Optional[QOSBandwidthControl] = None, @@ -334,7 +332,6 @@ class QOS(object): ) -> None: self.cluster_op = cluster_op self.enable_qos = enable_qos - self.enable_cluster_qos = enable_cluster_qos self.clust_qos_msg_interval: int = validate_clust_qos_msg_interval(clust_qos_msg_interval) self.qos_type = qos_type self.bw_obj = bw_obj @@ -348,7 +345,6 @@ class QOS(object): qos_type = qos_dict.get(QOSParams.qos_type.value) if qos_type: kwargs["qos_type"] = QOSType[qos_type] - kwargs["enable_cluster_qos"] = qos_dict.get(QOSParams.enable_cluster_qos.value) kwargs['clust_qos_msg_interval'] = qos_dict.get(QOSParams.clust_qos_msg_interval.value) kwargs["enable_qos"] = qos_dict.get(QOSParams.enable_qos.value) kwargs["bw_obj"] = QOSBandwidthControl.from_dict(qos_dict) @@ -363,7 +359,6 @@ class QOS(object): qos_type = qos_block.values.get(QOSParams.qos_type.value) if qos_type: kwargs["qos_type"] = QOSType(qos_type) - kwargs["enable_cluster_qos"] = qos_block.values.get(QOSParams.enable_cluster_qos.value) kwargs['clust_qos_msg_interval'] = qos_block.values.get(QOSParams.clust_qos_msg_interval.value) kwargs["enable_qos"] = qos_block.values.get(QOSParams.enable_qos.value) kwargs["bw_obj"] = QOSBandwidthControl.from_qos_block(qos_block) @@ -379,8 +374,6 @@ class QOS(object): if self.cluster_op: if self.qos_type: result.values[QOSParams.qos_type.value] = self.qos_type.value - if self.enable_cluster_qos is not None: - result.values[QOSParams.enable_cluster_qos.value] = self.enable_cluster_qos if self.clust_qos_msg_interval: result.values[QOSParams.clust_qos_msg_interval.value] = self.clust_qos_msg_interval if self.bw_obj and (res := self.bw_obj.to_qos_block()): @@ -395,8 +388,6 @@ class QOS(object): if self.cluster_op: if self.qos_type: r[QOSParams.qos_type.value] = self.qos_type.name - if self.enable_cluster_qos is not None: - r[QOSParams.enable_cluster_qos.value] = self.enable_cluster_qos if self.clust_qos_msg_interval: r[QOSParams.clust_qos_msg_interval.value] = self.clust_qos_msg_interval if self.bw_obj and (res := self.bw_obj.to_dict(ret_bw_in_bytes)): diff --git a/src/pybind/mgr/nfs/tests/test_nfs.py b/src/pybind/mgr/nfs/tests/test_nfs.py index 35e56224784a..57db70a2002e 100644 --- a/src/pybind/mgr/nfs/tests/test_nfs.py +++ b/src/pybind/mgr/nfs/tests/test_nfs.py @@ -147,7 +147,6 @@ EXPORT { qos_cluster_block = """ QOS { enable_qos = true; - enable_cluster_qos = true; enable_bw_control = true; combined_rw_bw_control = false; qos_type = "Per_Export_Per_Client"; @@ -178,7 +177,6 @@ QOS_BLOCK { qos_cluster_dict = { "enable_bw_control": True, "enable_qos": True, - "enable_cluster_qos": True, "combined_rw_bw_control": False, "max_client_read_bw": bytes_to_human(4000000, mode='binary'), "max_client_write_bw": bytes_to_human(3000000, mode='binary'), @@ -191,7 +189,6 @@ QOS_BLOCK { qos_cluster_dict_bw_in_bytes = { "enable_bw_control": True, "enable_qos": True, - "enable_cluster_qos": True, "combined_rw_bw_control": False, "max_client_read_bw": "4000000", "max_client_write_bw": "3000000", @@ -1488,16 +1485,12 @@ EXPORT { if not positive_tc: raise Exception("This TC was supposed to fail") out = cluster.get_cluster_qos(self.cluster_id) - expected_out = {"enable_bw_control": True, "enable_qos": True, "combined_rw_bw_control": combined_bw_ctrl, "qos_type": qos_type.name, "enable_iops_control": False, "enable_cluster_qos": True} + expected_out = {"enable_bw_control": True, "enable_qos": True, "combined_rw_bw_control": combined_bw_ctrl, "qos_type": qos_type.name, "enable_iops_control": False} for key in params: expected_out[QOSParams[key].value] = bytes_to_human(with_units_to_int(params[key]), mode='binary') assert out == expected_out - cluster.global_cluster_qos_action(self.cluster_id, 'enable', 200) - expected_out.update({'enable_cluster_qos': True, 'cqos_msg_interval': 200}) - assert cluster.get_cluster_qos(self.cluster_id) == expected_out - cluster.global_cluster_qos_action(self.cluster_id, 'disable') - expected_out.update({'enable_cluster_qos': False}) - del expected_out['cqos_msg_interval'] + cluster.cluster_qos_set_config(self.cluster_id, 200) + expected_out.update({'cqos_msg_interval': 200}) assert cluster.get_cluster_qos(self.cluster_id) == expected_out cluster.disable_cluster_qos_bw(self.cluster_id) out = cluster.get_cluster_qos(self.cluster_id) @@ -1537,7 +1530,7 @@ EXPORT { assert str(e) == 'To configure bandwidth control for export, you must first enable bandwidth control at the cluster level for foo.' bw_obj = QOSBandwidthControl(True, clust_combined_bw_ctrl, **clust_params) cluster.enable_cluster_qos_bw(self.cluster_id, qos_type, bw_obj) - clust_qos_conf = {'cluster_enable_qos': True, 'cluster_enable_bw_control': True, 'cluster_enable_iops_control': False} + clust_qos_conf = {'global_enable_qos': True, 'global_enable_bw_control': True, 'global_enable_iops_control': False} # set export qos try: bw_obj = QOSBandwidthControl(True, export_combined_bw_ctrl, **export_params) @@ -1596,16 +1589,13 @@ EXPORT { if not positive_tc: raise Exception("This TC was supposed to fail") out = cluster.get_cluster_qos(self.cluster_id) - expected_out = {"enable_bw_control": False, "enable_qos": True, "combined_rw_bw_control": False, "qos_type": qos_type.name, "enable_iops_control": True, "enable_cluster_qos": True} + expected_out = {"enable_bw_control": False, "enable_qos": True, "combined_rw_bw_control": False, "qos_type": qos_type.name, "enable_iops_control": True} for key in params: expected_out[QOSParams[key].value] = params[key] assert out == expected_out - cluster.global_cluster_qos_action(self.cluster_id, 'enable', 200) - expected_out.update({'enable_cluster_qos': True, 'cqos_msg_interval': 200}) + cluster.cluster_qos_set_config(self.cluster_id, 200) + expected_out.update({'cqos_msg_interval': 200}) assert cluster.get_cluster_qos(self.cluster_id) == expected_out - cluster.global_cluster_qos_action(self.cluster_id, 'disable') - expected_out.update({'enable_cluster_qos': False}) - del expected_out['cqos_msg_interval'] cluster.disable_cluster_qos_ops(self.cluster_id) out = cluster.get_cluster_qos(self.cluster_id) assert out == {"enable_bw_control": False, "enable_qos": False, "combined_rw_bw_control": False, "enable_iops_control": False} @@ -1636,7 +1626,7 @@ EXPORT { assert str(e) == 'To configure IOPS control for export, you must first enable IOPS control at the cluster level foo.' ops_obj = QOSOpsControl(True, **clust_params) cluster.enable_cluster_qos_ops(self.cluster_id, qos_type, ops_obj) - clust_qos_conf = {'cluster_enable_qos': True, 'cluster_enable_bw_control': False, 'cluster_enable_iops_control': True} + clust_qos_conf = {'global_enable_qos': True, 'global_enable_bw_control': False, 'global_enable_iops_control': True} # set export qos try: ops_obj = QOSOpsControl(True, **export_params) @@ -1685,7 +1675,7 @@ EXPORT { if not positive_tc: raise Exception("This TC passed but it was supposed to fail") out = cluster.get_cluster_qos(self.cluster_id) - expected_out = {"enable_bw_control": True, "enable_qos": True, "combined_rw_bw_control": False, "qos_type": ops_qos_type.name, "enable_iops_control": True, "enable_cluster_qos":True} + expected_out = {"enable_bw_control": True, "enable_qos": True, "combined_rw_bw_control": False, "qos_type": ops_qos_type.name, "enable_iops_control": True} bw_out = {} ops_out = {} for key in bw_params: @@ -1698,7 +1688,7 @@ EXPORT { # disable bandwidth control cluster.disable_cluster_qos_bw(self.cluster_id) out = cluster.get_cluster_qos(self.cluster_id) - ops_out.update({"enable_bw_control": False, "enable_qos": True, "combined_rw_bw_control": False, "enable_iops_control": True, "qos_type": ops_qos_type.name, "enable_cluster_qos": True}) + ops_out.update({"enable_bw_control": False, "enable_qos": True, "combined_rw_bw_control": False, "enable_iops_control": True, "qos_type": ops_qos_type.name}) assert out == ops_out # disable ops control cluster.disable_cluster_qos_ops(self.cluster_id) @@ -1751,7 +1741,7 @@ EXPORT { export_mgr.enable_export_qos_bw(self.cluster_id, '/cephfs_a/', bw_obj) ops_obj = QOSOpsControl(True, **export_ops_params) export_mgr.enable_export_qos_ops(self.cluster_id, '/cephfs_a/', ops_obj) - clust_qos_conf = {'cluster_enable_qos': True, 'cluster_enable_bw_control': True, 'cluster_enable_iops_control': True} + clust_qos_conf = {'global_enable_qos': True, 'global_enable_bw_control': True, 'global_enable_iops_control': True} except Exception: req = QOS_REQ_BW_PARAMS['combined_bw_disabled'][qos_type.name] if sorted(export_bw_params.keys()) != sorted(req):