]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm/nvmeof: Allow setting NVMEoF gateway huge pages count in the spec file 62937/head
authorGil Bregman <gbregman@il.ibm.com>
Wed, 23 Apr 2025 20:55:24 +0000 (23:55 +0300)
committerGil Bregman <gbregman@il.ibm.com>
Thu, 24 Apr 2025 21:54:59 +0000 (00:54 +0300)
Fixes https://tracker.ceph.com/issues/71043

Signed-off-by: Gil Bregman <gbregman@il.ibm.com>
src/cephadm/cephadmlib/daemons/nvmeof.py
src/cephadm/cephadmlib/sysctl.py
src/pybind/mgr/cephadm/services/nvmeof.py
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/python-common/ceph/deployment/service_spec.py

index 51b085df2a7eef785d958316f65ebb1703fee89c..75a05019e064d10e5461fddd2a91691823bba3ca 100644 (file)
@@ -211,13 +211,30 @@ class CephNvmeof(ContainerDaemonForm):
         return cmd.split()
 
     def get_sysctl_settings(self) -> List[str]:
-        if 'spdk_mem_size' not in self.files:
-            return [
-                'vm.nr_hugepages = 4096',
-            ]
-        else:
+        if 'spdk_mem_size' in self.files:
             return []
 
+        if 'spdk_huge_pages' in self.files:
+            try:
+                val = self.files['spdk_huge_pages']
+                huge_pages_value = int(val)
+                logger.debug(
+                    f'Found SPDK huge pages value {huge_pages_value}'
+                )
+                return [
+                    f'vm.nr_hugepages = {huge_pages_value}',
+                ]
+            except KeyError:
+                logger.exception('Failure getting SPDK huge pages value')
+            except ValueError:
+                logger.error(
+                    f'Invalid SPDK huge pages value {self.files[val]}'
+                )
+
+        return [
+            'vm.nr_hugepages = 4096',
+        ]
+
     def container(self, ctx: CephadmContext) -> CephContainer:
         ctr = daemon_to_container(ctx, self)
         return to_deployment_container(ctx, ctr)
@@ -239,3 +256,11 @@ class CephNvmeof(ContainerDaemonForm):
         args.extend(['--cap-add=CAP_SYS_NICE'])
         if 'spdk_mem_size' not in self.files:
             args.extend(['--cap-add=SYS_ADMIN'])
+            if 'spdk_huge_pages' in self.files:
+                try:
+                    huge_pages_value = int(self.files['spdk_huge_pages'])
+                    args.extend(['-e', f'HUGEPAGES={huge_pages_value}'])
+                except KeyError:
+                    pass
+                except ValueError:
+                    pass
index 6c9693ee96acb814b4735739e3ab25ee9ba019c9..4551ec20d379ddcb39c6f1ee793e75fa05816042 100644 (file)
@@ -39,6 +39,15 @@ def install_sysctl(
     daemon_type = daemon.identity.daemon_type
     conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
 
+    for conf_file in Path(ctx.sysctl_dir).glob('90-ceph-*.conf'):
+        if conf_file.name == f'90-ceph-{fsid}-{daemon_type}.conf':
+            continue
+        logger.warning(
+            f'Found a sysctl config file for a cluster with a different FSID '
+            f'({str(conf_file)}).\nThis might obstruct the setting of new values. '
+            f'Consider deleting the file.'
+        )
+
     lines = daemon.get_sysctl_settings()
     lines = filter_sysctl_settings(ctx, lines)
 
index 277ec6ce0648adbd0ceac4c53ad357c2d3211102..290b31e9c8e254dc514ce18a88ed39ad38b62904 100644 (file)
@@ -86,6 +86,12 @@ class NvmeofService(CephService):
         # Indicate to the daemon whether to utilize huge pages
         if spec.spdk_mem_size:
             daemon_spec.extra_files['spdk_mem_size'] = str(spec.spdk_mem_size)
+        elif spec.spdk_huge_pages:
+            try:
+                huge_pages_value = int(spec.spdk_huge_pages)
+                daemon_spec.extra_files['spdk_huge_pages'] = str(huge_pages_value)
+            except ValueError:
+                logger.error(f"Invalid value for SPDK huge pages: {spec.spdk_huge_pages}")
 
         if spec.enable_auth:
             if (
index 28369706edb0f98222ad8aabac26d1db18880e9c..0664416bcb2c8c6114ac626fce197c7b628576d7 100644 (file)
@@ -95,6 +95,10 @@ log_file_dir = {{ spec.spdk_log_file_dir }}
 conn_retries = {{ spec.conn_retries }}
 {% if spec.spdk_mem_size %}
 mem_size = {{ spec.spdk_mem_size }}
+{% else %}
+{% if spec.spdk_huge_pages %}
+spdk_huge_pages = {{ spec.spdk_huge_pages }}
+{% endif %}
 {% endif %}
 transports = {{ spec.transports }}
 {% if transport_tcp_options %}
index 0b9b2d7d2cbed15cb7fb1f3c5200d7d567b8a168..ec391e11e8241ee92a270cb9320eb46f6f4dfadf 100644 (file)
@@ -1417,6 +1417,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  # unused and duplicate of tgt_path below, consider removing
                  spdk_path: Optional[str] = None,
                  spdk_mem_size: Optional[int] = None,
+                 spdk_huge_pages: Optional[int] = None,
                  tgt_path: Optional[str] = None,
                  spdk_timeout: Optional[float] = 60.0,
                  spdk_log_level: Optional[str] = '',
@@ -1568,6 +1569,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt'
         #: ``spdk_mem_size`` memory size in MB for DPDK
         self.spdk_mem_size = spdk_mem_size
+        #: ``spdk_huge_pages`` huge pages count to be be used by SPDK
+        self.spdk_huge_pages = spdk_huge_pages
         #: ``tgt_path`` nvmeof target path
         self.tgt_path = tgt_path or '/usr/local/bin/nvmf_tgt'
         #: ``spdk_timeout`` SPDK connectivity timeout
@@ -1744,6 +1747,12 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_boolean(self.log_files_rotation_enabled, "Log files rotation enabled")
         verify_boolean(self.verbose_log_messages, "Verbose log messages")
         verify_boolean(self.enable_monitor_client, "Enable monitor client")
+        verify_positive_int(self.spdk_mem_size, "SPDK memory size")
+        verify_positive_int(self.spdk_huge_pages, "SPDK huge pages count")
+        if self.spdk_mem_size and self.spdk_huge_pages:
+            raise SpecValidationError(
+                '"spdk_mem_size" and "spdk_huge_pages" are mutually exclusive'
+                )
 
 
 yaml.add_representer(NvmeofServiceSpec, ServiceSpec.yaml_representer)