From: Gil Bregman Date: Wed, 23 Apr 2025 20:55:24 +0000 (+0300) Subject: mgr/cephadm/nvmeof: Allow setting NVMEoF gateway huge pages count in the spec file X-Git-Tag: v20.3.0~11^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=bf1df04cf33618094128e2b3c17f4adb9ffb6f35;p=ceph.git mgr/cephadm/nvmeof: Allow setting NVMEoF gateway huge pages count in the spec file Fixes https://tracker.ceph.com/issues/71043 Signed-off-by: Gil Bregman --- diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py index 51b085df2a7ee..75a05019e064d 100644 --- a/src/cephadm/cephadmlib/daemons/nvmeof.py +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -211,13 +211,30 @@ class CephNvmeof(ContainerDaemonForm): return cmd.split() def get_sysctl_settings(self) -> List[str]: - if 'spdk_mem_size' not in self.files: - return [ - 'vm.nr_hugepages = 4096', - ] - else: + if 'spdk_mem_size' in self.files: return [] + if 'spdk_huge_pages' in self.files: + try: + val = self.files['spdk_huge_pages'] + huge_pages_value = int(val) + logger.debug( + f'Found SPDK huge pages value {huge_pages_value}' + ) + return [ + f'vm.nr_hugepages = {huge_pages_value}', + ] + except KeyError: + logger.exception('Failure getting SPDK huge pages value') + except ValueError: + logger.error( + f'Invalid SPDK huge pages value {self.files[val]}' + ) + + return [ + 'vm.nr_hugepages = 4096', + ] + def container(self, ctx: CephadmContext) -> CephContainer: ctr = daemon_to_container(ctx, self) return to_deployment_container(ctx, ctr) @@ -239,3 +256,11 @@ class CephNvmeof(ContainerDaemonForm): args.extend(['--cap-add=CAP_SYS_NICE']) if 'spdk_mem_size' not in self.files: args.extend(['--cap-add=SYS_ADMIN']) + if 'spdk_huge_pages' in self.files: + try: + huge_pages_value = int(self.files['spdk_huge_pages']) + args.extend(['-e', f'HUGEPAGES={huge_pages_value}']) + except KeyError: + pass + except ValueError: + pass diff --git a/src/cephadm/cephadmlib/sysctl.py b/src/cephadm/cephadmlib/sysctl.py index 6c9693ee96acb..4551ec20d379d 100644 --- a/src/cephadm/cephadmlib/sysctl.py +++ b/src/cephadm/cephadmlib/sysctl.py @@ -39,6 +39,15 @@ def install_sysctl( daemon_type = daemon.identity.daemon_type conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf') + for conf_file in Path(ctx.sysctl_dir).glob('90-ceph-*.conf'): + if conf_file.name == f'90-ceph-{fsid}-{daemon_type}.conf': + continue + logger.warning( + f'Found a sysctl config file for a cluster with a different FSID ' + f'({str(conf_file)}).\nThis might obstruct the setting of new values. ' + f'Consider deleting the file.' + ) + lines = daemon.get_sysctl_settings() lines = filter_sysctl_settings(ctx, lines) diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 277ec6ce0648a..290b31e9c8e25 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -86,6 +86,12 @@ class NvmeofService(CephService): # Indicate to the daemon whether to utilize huge pages if spec.spdk_mem_size: daemon_spec.extra_files['spdk_mem_size'] = str(spec.spdk_mem_size) + elif spec.spdk_huge_pages: + try: + huge_pages_value = int(spec.spdk_huge_pages) + daemon_spec.extra_files['spdk_huge_pages'] = str(huge_pages_value) + except ValueError: + logger.error(f"Invalid value for SPDK huge pages: {spec.spdk_huge_pages}") if spec.enable_auth: if ( diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index 28369706edb0f..0664416bcb2c8 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -95,6 +95,10 @@ log_file_dir = {{ spec.spdk_log_file_dir }} conn_retries = {{ spec.conn_retries }} {% if spec.spdk_mem_size %} mem_size = {{ spec.spdk_mem_size }} +{% else %} +{% if spec.spdk_huge_pages %} +spdk_huge_pages = {{ spec.spdk_huge_pages }} +{% endif %} {% endif %} transports = {{ spec.transports }} {% if transport_tcp_options %} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 0b9b2d7d2cbed..ec391e11e8241 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1417,6 +1417,7 @@ class NvmeofServiceSpec(ServiceSpec): # unused and duplicate of tgt_path below, consider removing spdk_path: Optional[str] = None, spdk_mem_size: Optional[int] = None, + spdk_huge_pages: Optional[int] = None, tgt_path: Optional[str] = None, spdk_timeout: Optional[float] = 60.0, spdk_log_level: Optional[str] = '', @@ -1568,6 +1569,8 @@ class NvmeofServiceSpec(ServiceSpec): self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt' #: ``spdk_mem_size`` memory size in MB for DPDK self.spdk_mem_size = spdk_mem_size + #: ``spdk_huge_pages`` huge pages count to be be used by SPDK + self.spdk_huge_pages = spdk_huge_pages #: ``tgt_path`` nvmeof target path self.tgt_path = tgt_path or '/usr/local/bin/nvmf_tgt' #: ``spdk_timeout`` SPDK connectivity timeout @@ -1744,6 +1747,12 @@ class NvmeofServiceSpec(ServiceSpec): verify_boolean(self.log_files_rotation_enabled, "Log files rotation enabled") verify_boolean(self.verbose_log_messages, "Verbose log messages") verify_boolean(self.enable_monitor_client, "Enable monitor client") + verify_positive_int(self.spdk_mem_size, "SPDK memory size") + verify_positive_int(self.spdk_huge_pages, "SPDK huge pages count") + if self.spdk_mem_size and self.spdk_huge_pages: + raise SpecValidationError( + '"spdk_mem_size" and "spdk_huge_pages" are mutually exclusive' + ) yaml.add_representer(NvmeofServiceSpec, ServiceSpec.yaml_representer)