]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: NFS Configuration for RDMA support
authorShweta Bhosale <Shweta.Bhosale1@ibm.com>
Sun, 1 Mar 2026 16:50:32 +0000 (22:20 +0530)
committerShweta Bhosale <Shweta.Bhosale1@ibm.com>
Tue, 21 Apr 2026 07:44:37 +0000 (13:14 +0530)
Fixes: https://tracker.ceph.com/issues/75189
Signed-off-by: Shweta Bhosale <Shweta.Bhosale1@ibm.com>
14 files changed:
doc/cephadm/services/nfs.rst
doc/mgr/nfs.rst
src/pybind/mgr/cephadm/services/nfs.py
src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
src/pybind/mgr/cephadm/tests/services/test_nfs.py
src/pybind/mgr/cephadm/tests/test_scheduling.py
src/pybind/mgr/nfs/cluster.py
src/pybind/mgr/nfs/export.py
src/pybind/mgr/nfs/ganesha_conf.py
src/pybind/mgr/nfs/module.py
src/pybind/mgr/nfs/tests/test_nfs.py
src/pybind/mgr/nfs/utils.py
src/python-common/ceph/deployment/service_spec.py
src/python-common/ceph/tests/test_service_spec.py

index d712ff2276d23c422863a7525a97510385291f5b..bfe83ffa31d2933316081ffb60ee388bba56105a 100644 (file)
@@ -79,6 +79,39 @@ address is not present and ``monitoring_networks`` is specified, an IP address
 that matches one of the specified networks will be used. If neither condition
 is met, the default binding will happen on all available network interfaces.
 
+NFS over RDMA
+-------------
+
+NFS over RDMA is disabled by default. To enable it, set ``enable_rdma: true`` in
+the NFS service spec. You can optionally set ``rdma_port`` to use a custom RDMA
+port, if omitted, NFS Ganesha uses its default.
+
+When RDMA is enabled:
+
+* New exports in the cluster default to **Transports = TCP, RDMA**
+* For colocation, each entry in ``colocation_ports`` must include
+  ``rdma_port`` in addition to ``data_port`` and ``monitoring_port``.
+
+Example with RDMA enabled:
+
+.. code-block:: yaml
+
+    service_type: nfs
+    service_id: mynfs
+    placement:
+      count: 1
+      hosts: [host1]
+    spec:
+      port: 2049
+      monitoring_port: 9587
+      enable_rdma: true
+      rdma_port: 20049   # optional
+
+.. note:: If you use a bind address (e.g. ``virtual_ip``, ``ip_addrs``, or
+   ``networks``) with ``enable_rdma``, ensure the network interface for that
+   address is RDMA-capable. On the host, run ``rdma link show`` and confirm the
+   netdev for the interface with the bind IP is listed.
+
 NFS Daemon Colocation
 ----------------------
 
@@ -134,8 +167,9 @@ In this configuration, 4 daemons total are deployed (2 per host), distributed ac
      ``monitoring_port`` from the spec.
    * The number of entries in ``colocation_ports`` should be ``count - 1``,
      to cover the node down scenario (or ``count_per_host - 1`` when using ``count_per_host``).
-   * Each entry must specify both ``data_port`` and ``monitoring_port``.
-   * **If ``colocation_ports`` is not specified**, ports will be automatically
+   * Each entry must specify both ``data_port`` and ``monitoring_port``. When
+     ``enable_rdma`` is true, each entry must also include ``rdma_port``.
+   * If ``colocation_ports`` is not specified, ports will be automatically
      incremented for colocated daemons (e.g., 2049 → 2050 → 2051 for data ports,
      and 9587 → 9588 → 9589 for monitoring ports).
 
index ae3626f90548bece7a4baa88f122842db5fb4879..e09378b26cdf34648d6042573c7b5d20534e384a 100644 (file)
@@ -31,7 +31,7 @@ Create NFS Ganesha Cluster
 
 .. prompt:: bash #
 
-   ceph nfs cluster create <cluster_id> [<placement>] [--ingress] [--virtual_ip <value>] [--ingress-mode {default|keepalive-only|haproxy-standard|haproxy-protocol}] [--port <int>]
+   ceph nfs cluster create <cluster_id> [<placement>] [--ingress] [--virtual_ip <value>] [--ingress-mode {default|keepalive-only|haproxy-standard|haproxy-protocol}] [--port <int>] [--enable-rdma] [--rdma_port <int>] [-i <spec_file>]
 
 This creates a common recovery pool for all NFS Ganesha daemons, new user based on
 ``cluster_id``, and a common NFS Ganesha config RADOS object.
@@ -290,7 +290,7 @@ Create CephFS Export
 
 .. prompt:: bash #
 
-   ceph nfs export create cephfs --cluster-id <cluster_id> --pseudo-path <pseudo_path> --fsname <fsname> [--readonly] [--path=/path/in/cephfs] [--client_addr <value>...] [--squash <value>] [--sectype <value>...] [--cmount_path <value>] [--xprtsec <value>]
+   ceph nfs export create cephfs --cluster-id <cluster_id> --pseudo-path <pseudo_path> --fsname <fsname> [--readonly] [--path=/path/in/cephfs] [--client_addr <value>...] [--squash <value>] [--sectype <value>...] [--cmount_path <value>] [--xprtsec <value>] [--transports <value>...]
 
 This creates export RADOS objects containing the export block, where
 
@@ -334,6 +334,12 @@ allowed to be any complete path hierarchy between ``/`` and the ``EXPORT {path}`
 .. note:: If this and the other ``EXPORT { FSAL {} }`` options are the same between multiple exports, those exports will share a single CephFS client.
           If not specified, the default is ``/``.
 
+``<transports>`` is optional. List of NFS transport protocols. Valid values are
+``TCP``, ``UDP``, and ``RDMA``. Multiple values may be passed (e.g.
+``--transports TCP --transports RDMA`` or ``--transports TCP,RDMA``). If omitted,
+the export uses the default (e.g. TCP only, or TCP and RDMA when the cluster
+has RDMA enabled).
+
 .. note:: Specifying values for sectype that require Kerberos will only function on servers
           that are configured to support Kerberos. Setting up NFS-Ganesha to support Kerberos
           can be found here `Kerberos setup for NFS Ganesha in Ceph <https://github.com/nfs-ganesha/nfs-ganesha/wiki/Kerberos-setup-for-NFS-Ganesha-in-Ceph>`_.
@@ -358,7 +364,7 @@ To export a *bucket*:
 
 .. prompt:: bash #
 
-   ceph nfs export create rgw --cluster-id <cluster_id> --pseudo-path <pseudo_path> --bucket <bucket_name> [--user-id <user-id>] [--readonly] [--client_addr <value>...] [--squash <value>] [--sectype <value>...] [--xprtsec <value>]
+   ceph nfs export create rgw --cluster-id <cluster_id> --pseudo-path <pseudo_path> --bucket <bucket_name> [--user-id <user-id>] [--readonly] [--client_addr <value>...] [--squash <value>] [--sectype <value>...] [--xprtsec <value>] [--transports <value>...]
 
 For example, to export ``mybucket`` via NFS cluster ``mynfs`` at the
 pseudo-path ``/bucketdata`` to any host in the ``192.168.10.0/24`` network
@@ -402,6 +408,10 @@ multiple values may be separated by a comma (example: ``--sectype
 krb5p,krb5i``). The server will negotatiate a supported security type with the
 client preferring the supplied methods left-to-right.
 
+``<transports>`` is optional. Valid values are ``TCP``, ``UDP``, and ``RDMA``.
+Multiple values may be passed. If omitted, defaults apply (e.g. TCP and RDMA
+when the cluster has RDMA enabled).
+
 .. note:: Specifying values for sectype that require Kerberos will only
    function on servers that are configured to support Kerberos. Setting up
    NFS-Ganesha to support Kerberos is outside the scope of this document.
@@ -417,7 +427,7 @@ To export an RGW *user*:
 
 .. prompt:: bash #
 
-   ceph nfs export create rgw --cluster-id <cluster_id> --pseudo-path <pseudo_path> --user-id <user-id> [--readonly] [--client_addr <value>...] [--squash <value>]
+   ceph nfs export create rgw --cluster-id <cluster_id> --pseudo-path <pseudo_path> --user-id <user-id> [--readonly] [--client_addr <value>...] [--squash <value>] [--transports <value>...]
 
 For example, to export *myuser* via NFS cluster *mynfs* at the pseudo-path */myuser* to any host in the ``192.168.10.0/24`` network
 
index d1e7d85bd1a8bc899111867c4360d926182ee3e1..c130c8c7c08e116594053b1dc2a41c06396aba1a 100644 (file)
@@ -139,6 +139,8 @@ class NFSService(CephService):
         deps.append(f'tls_debug: {nfs_spec.tls_debug}')
         deps.append(f'tls_min_version: {nfs_spec.tls_min_version}')
         deps.append(f'tls_ciphers: {nfs_spec.tls_ciphers}')
+        deps.append(f'enable_rdma: {nfs_spec.enable_rdma}')
+        deps.append(f'rdma_port: {nfs_spec.rdma_port}')
         return sorted(deps)
 
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
@@ -192,11 +194,26 @@ class NFSService(CephService):
             logger.warning(f'Bind address in {daemon_type}.{daemon_id}\'s ganesha conf is defaulting to empty')
         else:
             logger.debug("using haproxy bind address: %r", bind_addr)
+            if spec.enable_rdma:
+                logger.warning(
+                    'NFS RDMA is enabled with Bind_Addr %s on host %s. '
+                    'Ensure the network interface for this address is RDMA-capable. '
+                    "On the host, run 'rdma link show' and confirm the netdev for the interface "
+                    'with this IP is listed.',
+                    bind_addr.split('/')[0] if bind_addr else bind_addr,
+                    host,
+                )
 
         if monitoring_ip:
             daemon_spec.port_ips.update({str(monitoring_port): monitoring_ip})
 
         # generate the ganesha config
+        rdma_port = None
+        if spec.enable_rdma and daemon_spec.ports and len(daemon_spec.ports) > 2:
+            rdma_port = daemon_spec.ports[2]
+        elif spec.enable_rdma:
+            rdma_port = spec.rdma_port
+
         def get_ganesha_conf() -> str:
             context: Dict[str, Any] = {
                 "user": rados_user,
@@ -213,6 +230,8 @@ class NFSService(CephService):
                 "haproxy_hosts": [],
                 "nfs_idmap_conf": nfs_idmap_conf,
                 "enable_nlm": str(spec.enable_nlm).lower(),
+                "enable_rdma": spec.enable_rdma,
+                "rdma_port": rdma_port,
                 "cluster_id": self.mgr._cluster_fsid,
                 "tls_add": spec.ssl,
                 "tls_ciphers": spec.tls_ciphers,
index 1a3028bdcd5724070243e4b18dbf9d8145010eee..192a845fe07129806e6a9f328dcde0987b176bb3 100644 (file)
@@ -2,7 +2,11 @@
 NFS_CORE_PARAM {
         Enable_NLM = {{ enable_nlm }};
         Enable_RQUOTA = false;
+{% if enable_rdma %}
+        Protocols = 3, 4, nfsrdma, rpcrdma;
+{% else %}
         Protocols = 3, 4;
+{% endif %}
         mount_path_pseudo = true;
         Enable_UDP = false;
         NFS_Port = {{ port }};
@@ -17,6 +21,9 @@ NFS_CORE_PARAM {
        Monitoring_Addr = {{ monitoring_addr }};
 {% endif %}
         Monitoring_Port = {{ monitoring_port }};
+{% if enable_rdma and rdma_port %}
+        NFS_RDMA_Port = {{ rdma_port }};
+{% endif %}
 }
 
 NFSv4 {
index 2e15cdfa4b1143f8fb7e5f028200e42152e90660..86cc0ea8a30e47c635e7089e8e258adf59a13ae7 100644 (file)
@@ -1,10 +1,18 @@
 import contextlib
 from unittest.mock import MagicMock, patch, ANY
 
+import pytest
+
 from cephadm.services.service_registry import service_registry
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.module import CephadmOrchestrator
-from ceph.deployment.service_spec import NFSServiceSpec, PlacementSpec, RGWSpec, IngressSpec
+from ceph.deployment.service_spec import (
+    NFSServiceSpec,
+    PlacementSpec,
+    RGWSpec,
+    IngressSpec,
+    SpecValidationError,
+)
 from cephadm.tests.fixtures import with_host, with_service, wait, async_side_effect
 
 
@@ -478,6 +486,163 @@ class TestNFS:
                 )
                 assert expected_tls_block in ganesha_conf
 
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+    def test_nfs_config_rdma_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        """NFS with enable_rdma=True: ganesha.conf has RDMA protocols (nfsrdma, rpcrdma)."""
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'host1', addr='1.2.3.7'):
+            nfs_spec = NFSServiceSpec(
+                service_id="foo",
+                placement=PlacementSpec(hosts=['host1']),
+                enable_rdma=True,
+            )
+            with with_service(cephadm_module, nfs_spec) as _:
+                nfs_generated_conf, _ = service_registry.get_service('nfs').generate_config(
+                    CephadmDaemonDeploySpec(
+                        host='host1',
+                        daemon_id='foo.host1.0.0',
+                        service_name=nfs_spec.service_name(),
+                        ports=[2049, 9587, 20049],
+                    ))
+                ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
+                assert "Protocols = 3, 4, nfsrdma, rpcrdma" in ganesha_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+    def test_nfs_config_rdma_custom_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        """NFS with enable_rdma and rdma_port: ganesha.conf has NFS_RDMA_Port."""
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'host1', addr='1.2.3.7'):
+            nfs_spec = NFSServiceSpec(
+                service_id="foo",
+                placement=PlacementSpec(hosts=['host1']),
+                enable_rdma=True,
+                rdma_port=1234,
+            )
+            with with_service(cephadm_module, nfs_spec) as _:
+                nfs_generated_conf, _ = service_registry.get_service('nfs').generate_config(
+                    CephadmDaemonDeploySpec(
+                        host='host1',
+                        daemon_id='foo.host1.0.0',
+                        service_name=nfs_spec.service_name(),
+                        ports=[2049, 9587, 1234],
+                    ))
+                ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
+                assert "Protocols = 3, 4, nfsrdma, rpcrdma" in ganesha_conf
+                assert "NFS_RDMA_Port = 1234" in ganesha_conf
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
+    @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
+    def test_nfs_config_rdma_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        """NFS without RDMA: ganesha.conf has Protocols = 3, 4 and no NFS_RDMA_Port."""
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        with with_host(cephadm_module, 'host1', addr='1.2.3.7'):
+            nfs_spec = NFSServiceSpec(
+                service_id="foo",
+                placement=PlacementSpec(hosts=['host1']),
+            )
+            with with_service(cephadm_module, nfs_spec) as _:
+                nfs_generated_conf, _ = service_registry.get_service('nfs').generate_config(
+                    CephadmDaemonDeploySpec(
+                        host='host1',
+                        daemon_id='foo.host1.0.0',
+                        service_name=nfs_spec.service_name(),
+                    ))
+                ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
+                assert "Protocols = 3, 4" in ganesha_conf
+                assert "nfsrdma" not in ganesha_conf
+                assert "NFS_RDMA_Port" not in ganesha_conf
+
+
+def test_nfs_colocation_ports_validation():
+    """Test validation of colocation_ports in NFSServiceSpec"""
+    # Valid case: correct number of colocation_ports (count=3, need 2 additional)
+    spec = NFSServiceSpec(
+        service_id='mynfs',
+        placement=PlacementSpec(count=3),
+        port=2049,
+        monitoring_port=9587,
+        colocation_ports=[
+            {'data_port': 3049, 'monitoring_port': 9588},
+            {'data_port': 4049, 'monitoring_port': 9589}
+        ]
+    )
+    spec.validate()  # Should not raise
+
+    # Invalid case: too few colocation_ports (count=4, need 3 additional, but only 1 provided)
+    with pytest.raises(SpecValidationError) as e:
+        spec = NFSServiceSpec(
+            service_id='mynfs',
+            placement=PlacementSpec(count=4),
+            port=2049,
+            monitoring_port=9587,
+            colocation_ports=[{'data_port': 3049, 'monitoring_port': 9588}]
+        )
+        spec.validate()
+    assert "colocation_ports requires 3 entries for count=4 (got 1)" in str(e.value)
+
+    # Invalid case: missing required field
+    with pytest.raises(SpecValidationError) as e:
+        spec = NFSServiceSpec(
+            service_id='mynfs',
+            placement=PlacementSpec(count=3),
+            port=2049,
+            monitoring_port=9587,
+            colocation_ports=[
+                {'data_port': 3049},  # Missing monitoring_port
+                {'data_port': 4049, 'monitoring_port': 9589}
+            ]
+        )
+        spec.validate()
+    assert "missing required fields: monitoring_port" in str(e.value)
+
+
+def test_nfs_colocation_ports_validation_with_rdma():
+    """Test colocation_ports with enable_rdma requires rdma_port in each entry."""
+    # Valid: enable_rdma=True, count=3, 2 colocation entries with data_port, monitoring_port, rdma_port
+    spec = NFSServiceSpec(
+        service_id='mynfs',
+        placement=PlacementSpec(count=3),
+        port=2049,
+        monitoring_port=9587,
+        enable_rdma=True,
+        rdma_port=20049,
+        colocation_ports=[
+            {'data_port': 3049, 'monitoring_port': 9588, 'rdma_port': 20050},
+            {'data_port': 4049, 'monitoring_port': 9589, 'rdma_port': 20051},
+        ]
+    )
+    spec.validate()
+
+    # Invalid: enable_rdma=True but colocation entry missing rdma_port
+    with pytest.raises(SpecValidationError) as e:
+        spec = NFSServiceSpec(
+            service_id='mynfs',
+            placement=PlacementSpec(count=3),
+            port=2049,
+            monitoring_port=9587,
+            enable_rdma=True,
+            colocation_ports=[
+                {'data_port': 3049, 'monitoring_port': 9588},  # missing rdma_port
+                {'data_port': 4049, 'monitoring_port': 9589, 'rdma_port': 20051},
+            ]
+        )
+        spec.validate()
+    assert "missing required fields: rdma_port" in str(e.value)
+
 
 @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
 @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
@@ -505,7 +670,6 @@ def test_nfs_choose_next_action(cephadm_module, mock_cephadm):
         # dependencies are prefixed with 'kmip' but I can't find any code
         # that would produce any dependencies prefixed with 'kmip'!
 
-
 @patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
 @patch("cephadm.services.nfs.NFSService.purge", MagicMock())
 @patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
index a6d155af3ec751f887fc6c78db0cd3311ce6f4c3..e513bdeeaff0ff1e93118ff0573ee06779e0ca7c 100644 (file)
@@ -1328,50 +1328,6 @@ def test_bad_specs(service_type, placement, hosts, daemons, expected):
     assert str(e.value) == expected
 
 
-def test_nfs_colocation_ports_validation():
-    """Test validation of colocation_ports in NFSServiceSpec"""
-    from ceph.deployment.service_spec import SpecValidationError
-    # Valid case: correct number of colocation_ports (count=3, need 2 additional)
-    spec = NFSServiceSpec(
-        service_id='mynfs',
-        placement=PlacementSpec(count=3),
-        port=2049,
-        monitoring_port=9587,
-        colocation_ports=[
-            {'data_port': 3049, 'monitoring_port': 9588},
-            {'data_port': 4049, 'monitoring_port': 9589}
-        ]
-    )
-    spec.validate()  # Should not raise
-
-    # Invalid case: too few colocation_ports (count=4, need 3 additional, but only 1 provided)
-    with pytest.raises(SpecValidationError) as e:
-        spec = NFSServiceSpec(
-            service_id='mynfs',
-            placement=PlacementSpec(count=4),
-            port=2049,
-            monitoring_port=9587,
-            colocation_ports=[{'data_port': 3049, 'monitoring_port': 9588}]
-        )
-        spec.validate()
-    assert "colocation_ports requires 3 entries for count=4 (got 1)" in str(e.value)
-
-    # Invalid case: missing required field
-    with pytest.raises(SpecValidationError) as e:
-        spec = NFSServiceSpec(
-            service_id='mynfs',
-            placement=PlacementSpec(count=3),
-            port=2049,
-            monitoring_port=9587,
-            colocation_ports=[
-                {'data_port': 3049},  # Missing monitoring_port
-                {'data_port': 4049, 'monitoring_port': 9589}
-            ]
-        )
-        spec.validate()
-    assert "missing required fields: monitoring_port" in str(e.value)
-
-
 class ActiveAssignmentTest(NamedTuple):
     service_type: str
     placement: PlacementSpec
index e791d5f050589a875d39a317189095d0ae698d77..1b3ce8213f558d4787fde4d758dfafb2f58d4a0f 100644 (file)
@@ -73,6 +73,8 @@ class NFSCluster:
             tls_debug: bool = False,
             tls_min_version: Optional[str] = None,
             tls_ciphers: Optional[str] = None,
+            enable_rdma: bool = False,
+            rdma_port: Optional[int] = None,
     ) -> None:
         if not port:
             port = 2049   # default nfs port
@@ -114,7 +116,9 @@ class NFSCluster:
                                   tls_ktls=tls_ktls,
                                   tls_debug=tls_debug,
                                   tls_min_version=tls_min_version,
-                                  tls_ciphers=tls_ciphers)
+                                  tls_ciphers=tls_ciphers,
+                                  enable_rdma=enable_rdma,
+                                  rdma_port=rdma_port)
             completion = self.mgr.apply_nfs(spec)
             orchestrator.raise_if_exception(completion)
             ispec = IngressSpec(service_type='ingress',
@@ -140,7 +144,9 @@ class NFSCluster:
                                   tls_ktls=tls_ktls,
                                   tls_debug=tls_debug,
                                   tls_min_version=tls_min_version,
-                                  tls_ciphers=tls_ciphers)
+                                  tls_ciphers=tls_ciphers,
+                                  enable_rdma=enable_rdma,
+                                  rdma_port=rdma_port)
             completion = self.mgr.apply_nfs(spec)
             orchestrator.raise_if_exception(completion)
         log.debug("Successfully deployed nfs daemons with cluster id %s and placement %s",
@@ -172,6 +178,8 @@ class NFSCluster:
             tls_debug: bool = False,
             tls_min_version: Optional[str] = None,
             tls_ciphers: Optional[str] = None,
+            enable_rdma: bool = False,
+            rdma_port: Optional[int] = None,
     ) -> None:
         try:
             if virtual_ip:
@@ -197,7 +205,7 @@ class NFSCluster:
             if cluster_id not in available_clusters(self.mgr):
                 self._call_orch_apply_nfs(cluster_id, placement, virtual_ip, ingress_mode, port,
                                           ssl, ssl_cert, ssl_key, ssl_ca_cert, tls_ktls, tls_debug,
-                                          tls_min_version, tls_ciphers)
+                                          tls_min_version, tls_ciphers, enable_rdma, rdma_port)
                 return
             raise NonFatalError(f"{cluster_id} cluster already exists")
         except Exception as e:
index 172012ed62e406017e99f7592756bd5a67106692..bdd3365c8b1927627940abf2904019fa7506bf6d 100644 (file)
@@ -38,7 +38,9 @@ from .utils import (
     conf_obj_name,
     available_clusters,
     check_fs,
-    restart_nfs_service, cephfs_path_is_dir)
+    get_nfs_spec_for_cluster,
+    restart_nfs_service,
+    cephfs_path_is_dir)
 
 if TYPE_CHECKING:
     from nfs.module import Module
@@ -722,6 +724,11 @@ class ExportMgr:
 
         ex_dict["fsal"] = fsal
         ex_dict["cluster_id"] = cluster_id
+        # When RDMA is enabled at cluster level, default export transports to tcp, RDMA
+        if "transports" not in ex_dict:
+            nfs_spec = get_nfs_spec_for_cluster(self.mgr, cluster_id)
+            if nfs_spec and getattr(nfs_spec, "enable_rdma", False):
+                ex_dict["transports"] = ["TCP", "RDMA"]
         export = Export.from_dict(ex_id, ex_dict)
         if export.fsal.name == NFS_GANESHA_SUPPORTED_FSALS[0]:
             self._ensure_cephfs_export_user(export)
@@ -742,6 +749,7 @@ class ExportMgr:
                              sectype: Optional[List[str]] = None,
                              xprtsec: Optional[str] = None,
                              cmount_path: Optional[str] = "/",
+                             transports: Optional[List[str]] = None,
                              earmark_resolver: Optional[CephFSEarmarkResolver] = None
                              ) -> Dict[str, Any]:
 
@@ -751,24 +759,27 @@ class ExportMgr:
 
         pseudo_path = normalize_path(pseudo_path)
 
+        export_dict = {
+            "pseudo": pseudo_path,
+            "path": path,
+            "access_type": access_type,
+            "squash": squash,
+            "fsal": {
+                "name": NFS_GANESHA_SUPPORTED_FSALS[0],
+                "cmount_path": cmount_path,
+                "fs_name": fs_name,
+            },
+            "clients": clients,
+            "sectype": sectype,
+            "XprtSec": xprtsec,
+        }
+        if transports is not None:
+            export_dict["transports"] = transports
         if not self._fetch_export(cluster_id, pseudo_path):
             export = self.create_export_from_dict(
                 cluster_id,
                 self._gen_export_id(cluster_id),
-                {
-                    "pseudo": pseudo_path,
-                    "path": path,
-                    "access_type": access_type,
-                    "squash": squash,
-                    "fsal": {
-                        "name": NFS_GANESHA_SUPPORTED_FSALS[0],
-                        "cmount_path": cmount_path,
-                        "fs_name": fs_name,
-                    },
-                    "clients": clients,
-                    "sectype": sectype,
-                    "XprtSec": xprtsec,
-                },
+                export_dict,
                 earmark_resolver
             )
             log.debug("creating cephfs export %s", export)
@@ -794,29 +805,33 @@ class ExportMgr:
                           user_id: Optional[str] = None,
                           clients: list = [],
                           sectype: Optional[List[str]] = None,
-                          xprtsec: Optional[str] = None) -> Dict[str, Any]:
+                          xprtsec: Optional[str] = None,
+                          transports: Optional[List[str]] = None) -> Dict[str, Any]:
         pseudo_path = normalize_path(pseudo_path)
 
         if not bucket and not user_id:
             raise ErrorResponse("Must specify either bucket or user_id")
 
+        export_dict = {
+            "pseudo": pseudo_path,
+            "path": bucket or '/',
+            "access_type": access_type,
+            "squash": squash,
+            "fsal": {
+                "name": NFS_GANESHA_SUPPORTED_FSALS[1],
+                "user_id": user_id,
+            },
+            "clients": clients,
+            "sectype": sectype,
+            "XprtSec": xprtsec,
+        }
+        if transports is not None:
+            export_dict["transports"] = transports
         if not self._fetch_export(cluster_id, pseudo_path):
             export = self.create_export_from_dict(
                 cluster_id,
                 self._gen_export_id(cluster_id),
-                {
-                    "pseudo": pseudo_path,
-                    "path": bucket or '/',
-                    "access_type": access_type,
-                    "squash": squash,
-                    "fsal": {
-                        "name": NFS_GANESHA_SUPPORTED_FSALS[1],
-                        "user_id": user_id,
-                    },
-                    "clients": clients,
-                    "sectype": sectype,
-                    "XprtSec": xprtsec,
-                }
+                export_dict
             )
             log.debug("creating rgw export %s", export)
             self._create_rgw_export_user(export)
index c4072a01d331d603d5d2c9fb0125e4696a2a30de..759b4dcdc98d7fa28198db2b6d1f0b6ef87b750f 100644 (file)
@@ -523,7 +523,7 @@ class Export:
             if p not in [3, 4]:
                 raise NFSInvalidOperation(f"Invalid protocol {p}")
 
-        valid_transport = ["UDP", "TCP"]
+        valid_transport = ["UDP", "TCP", "RDMA"]
         for trans in self.transports:
             if trans.upper() not in valid_transport:
                 raise NFSInvalidOperation(f'{trans} is not a valid transport protocol')
index ef9860dfffeeb67559c1f7ab321f101bde683399..762a2fab9c5ab460df06f083ca35323860723161 100644 (file)
@@ -44,7 +44,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
             squash: str = 'none',
             sectype: Optional[List[str]] = None,
             xprtsec: Optional[str] = None,
-            cmount_path: Optional[str] = "/"
+            cmount_path: Optional[str] = "/",
+            transports: Optional[List[str]] = None
     ) -> Dict[str, Any]:
         """Create a CephFS export"""
         earmark_resolver = CephFSEarmarkResolver(self)
@@ -60,6 +61,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
             sectype=sectype,
             xprtsec=xprtsec,
             cmount_path=cmount_path,
+            transports=transports,
             earmark_resolver=earmark_resolver
         )
 
@@ -76,6 +78,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
             squash: str = 'none',
             sectype: Optional[List[str]] = None,
             xprtsec: Optional[str] = None,
+            transports: Optional[List[str]] = None
     ) -> Dict[str, Any]:
         """Create an RGW export"""
         return self.export_mgr.create_export(
@@ -88,7 +91,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
             squash=squash,
             addr=client_addr,
             sectype=sectype,
-            xprtsec=xprtsec
+            xprtsec=xprtsec,
+            transports=transports,
         )
 
     @NFSCLICommand('nfs export rm', perm='rw')
@@ -139,6 +143,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
                                 virtual_ip: Optional[str] = None,
                                 ingress_mode: Optional[IngressType] = None,
                                 port: Optional[int] = None,
+                                enable_rdma: bool = False,
+                                rdma_port: Optional[int] = None,
                                 inbuf: Optional[str] = None) -> None:
         """Create an NFS Cluster"""
         ssl_cert = ssl_key = ssl_ca_cert = tls_min_version = tls_ciphers = None
@@ -164,7 +170,9 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
                                            tls_ktls=tls_ktls,
                                            tls_debug=tls_debug,
                                            tls_min_version=tls_min_version,
-                                           tls_ciphers=tls_ciphers)
+                                           tls_ciphers=tls_ciphers,
+                                           enable_rdma=enable_rdma,
+                                           rdma_port=rdma_port)
 
     @NFSCLICommand('nfs cluster rm', perm='rw')
     @object_format.EmptyResponder()
index edcbc49df94b4424ed874dc7c057fd42d3c1f28d..cb9587e6ad1ee66f320016a02ccc204ce00402ef 100644 (file)
@@ -1166,6 +1166,99 @@ NFS_CORE_PARAM {
         assert export.clients[0].access_type == 'rw'
         assert export.clients[0].addresses == ["192.168.1.0/8"]
         assert export.cluster_id == self.cluster_id
+
+    def test_create_export_default_transports_rdma_cluster(self):
+        """When cluster has enable_rdma=True, new exports get default Transports = tcp, RDMA."""
+        self._do_mock_test(self._do_test_create_export_default_transports_rdma_cluster)
+
+    def _do_test_create_export_default_transports_rdma_cluster(self):
+        nfs_mod = Module('nfs', '', '')
+        conf = ExportMgr(nfs_mod)
+        rdma_spec = NFSServiceSpec(service_id=self.cluster_id, enable_rdma=True)
+        with mock.patch('nfs.export.get_nfs_spec_for_cluster', return_value=rdma_spec):
+            r = conf.create_export(
+                fsal_type='rgw',
+                cluster_id=self.cluster_id,
+                bucket='rdmabucket',
+                pseudo_path='/rdmabucket',
+                read_only=False,
+                squash='root',
+                addr=["192.168.0.0/16"],
+            )
+        assert r["bind"] == "/rdmabucket"
+        export = conf._fetch_export(self.cluster_id, '/rdmabucket')
+        assert export is not None
+        assert sorted(export.transports) == ["RDMA", "TCP"]
+
+    def test_export_transport_rdma_valid(self):
+        """Export with Transports = TCP, RDMA is valid."""
+        export_block = """
+EXPORT {
+    export_id = 1;
+    path = "/";
+    pseudo = "/rdma_export";
+    access_type = "RW";
+    squash = "none";
+    protocols = 4;
+    transports = "TCP", "RDMA";
+    FSAL {
+        name = "CEPH";
+        filesystem = "a";
+        cmount_path = "/";
+    }
+}
+"""
+        blocks = GaneshaConfParser(export_block).parse()
+        export = Export.from_export_block(blocks[0], self.cluster_id)
+        assert set(export.transports) == {"TCP", "RDMA"}
+        # Validate should pass (RDMA is in valid_transport)
+        nfs_mod = Module('nfs', '', '')
+        conf = ExportMgr(nfs_mod)
+        with mock.patch('nfs.export.check_fs', return_value=True), \
+                mock.patch('nfs.ganesha_conf.check_fs', return_value=True):
+            export.validate(conf.mgr)
+
+    def test_update_export_without_transport_rdma_cluster(self):
+        """Apply export update without Transport fields; result has Transports = TCP, RDMA."""
+        self._do_mock_test(self._do_test_update_export_without_transport_rdma_cluster)
+
+    def _do_test_update_export_without_transport_rdma_cluster(self):
+        nfs_mod = Module('nfs', '', '')
+        conf = ExportMgr(nfs_mod)
+        # Existing export at /rgw has Transports = TCP, UDP (from export_2)
+        export_before = conf._fetch_export(self.cluster_id, '/rgw')
+        assert export_before is not None
+        assert set(export_before.transports) == {"TCP", "UDP"}
+
+        # apply_export with no 'transports' field; cluster has enable_rdma -> default TCP, RDMA
+        rdma_spec = NFSServiceSpec(service_id=self.cluster_id, enable_rdma=True)
+        with mock.patch('nfs.export.get_nfs_spec_for_cluster', return_value=rdma_spec):
+            r = conf.apply_export(self.cluster_id, json.dumps({
+                'export_id': 2,
+                'path': '/',
+                'pseudo': '/rgw',
+                'cluster_id': self.cluster_id,
+                'access_type': 'RO',
+                'squash': 'root',
+                'security_label': False,
+                'protocols': [4, 3],
+                'clients': [],
+                'fsal': {
+                    'name': 'RGW',
+                    'user_id': 'nfs.foo.bucket',
+                    'access_key_id': 'the_access_key',
+                    'secret_access_key': 'the_secret_key',
+                },
+            }))
+        assert len(r.changes) == 1
+
+        export_after = conf._fetch_export(self.cluster_id, '/rgw')
+        assert export_after is not None
+        assert export_after.export_id == 2
+        assert export_after.access_type == 'RO'
+        assert export_after.squash == 'root'
+        # Updated export has Transports = TCP, RDMA (default when transports omitted and RDMA enabled)
+        assert set(export_after.transports) == {'TCP', 'RDMA'}
     
     def _do_test_create_export_cephfs_with_cmount_path(self):
         nfs_mod = Module('nfs', '', '')
index ff5324228b566a6a6df83dc871b6fd940cf457db..fe928007a6d9cb103cc695f7c07d75f74d8a8983 100644 (file)
@@ -1,7 +1,7 @@
 import functools
 import logging
 import stat
-from typing import List, Tuple, TYPE_CHECKING
+from typing import List, Optional, Tuple, Any, TYPE_CHECKING
 
 from object_format import ErrorResponseBase
 import orchestrator
@@ -81,6 +81,22 @@ def available_clusters(mgr: 'Module') -> List[str]:
             if cluster.spec.service_id]
 
 
+def get_nfs_spec_for_cluster(mgr: 'Module', cluster_id: str) -> Optional[Any]:
+    """Return the NFS service spec for the given cluster_id, or None if not found."""
+    try:
+        completion = mgr.describe_service(service_type='nfs')
+        orchestrator.raise_if_exception(completion)
+        if completion.result:
+            for svc in completion.result:
+                if getattr(svc.spec, 'service_id', None) == cluster_id:
+                    return svc.spec
+    except NoOrchestrator:
+        log.debug("No orchestrator configured")
+    except Exception:
+        log.debug("Failed to get NFS spec for cluster %s", cluster_id)
+    return None
+
+
 def nfs_rados_configs(rados: 'Rados', nfs_pool: str = POOL_NAME) -> List[str]:
     """Return a list of all the namespaces in the nfs_pool where nfs
     configuration objects are found. The namespaces also correspond
index 7ecc907d0bc50dde5eee548fac754c3fe8c32ebf..0d52a9bc7ffbfb7c42f49eb813001eb33e71cb67 100644 (file)
@@ -1351,6 +1351,7 @@ yaml.add_representer(ServiceSpec, ServiceSpec.yaml_representer)
 
 class NFSServiceSpec(ServiceSpec):
     COLOCATION_PORT_FIELDS = ['data_port', 'monitoring_port']
+    COLOCATION_PORT_FIELDS_WITH_RDMA = ['data_port', 'monitoring_port', 'rdma_port']
 
     def __init__(self,
                  service_type: str = 'nfs',
@@ -1368,6 +1369,8 @@ class NFSServiceSpec(ServiceSpec):
                  virtual_ip: Optional[str] = None,
                  enable_nlm: bool = False,
                  enable_haproxy_protocol: bool = False,
+                 enable_rdma: bool = False,
+                 rdma_port: Optional[int] = None,
                  extra_container_args: Optional[GeneralArgList] = None,
                  extra_entrypoint_args: Optional[GeneralArgList] = None,
                  idmap_conf: Optional[Dict[str, Dict[str, str]]] = None,
@@ -1407,6 +1410,8 @@ class NFSServiceSpec(ServiceSpec):
         self.enable_haproxy_protocol = enable_haproxy_protocol
         self.idmap_conf = idmap_conf
         self.enable_nlm = enable_nlm
+        self.enable_rdma = enable_rdma
+        self.rdma_port = rdma_port
 
         # colocation_ports is a list of port dicts for ADDITIONAL colocated daemons
         # The first daemon always uses port and monitoring_port from the spec
@@ -1419,8 +1424,17 @@ class NFSServiceSpec(ServiceSpec):
         self.tls_debug = tls_debug
         self.tls_min_version = tls_min_version
 
+    def get_colocation_port_fields(self) -> List[str]:
+        """Return port fields for colocation; include rdma_port when RDMA is enabled."""
+        if self.enable_rdma:
+            return self.COLOCATION_PORT_FIELDS_WITH_RDMA
+        return self.COLOCATION_PORT_FIELDS
+
     def get_port_start(self) -> List[int]:
-        return [self.port or 2049, self.monitoring_port or 9587]
+        ports = [self.port or 2049, self.monitoring_port or 9587]
+        if self.enable_rdma:
+            ports.append(self.rdma_port or 20049)
+        return ports
 
     def get_colocation_ports_list(self) -> List[List[int]]:
         """
@@ -1429,7 +1443,8 @@ class NFSServiceSpec(ServiceSpec):
         """
         if not self.colocation_ports:
             return []
-        return [[port_dict[field] for field in self.COLOCATION_PORT_FIELDS]
+        fields = self.get_colocation_port_fields()
+        return [[port_dict[field] for field in fields]
                 for port_dict in self.colocation_ports]
 
     def rados_config_name(self):
@@ -1460,16 +1475,17 @@ class NFSServiceSpec(ServiceSpec):
                         "ports, remaining need custom ports."
                     )
         # Validate that each entry has the required port fields
+        fields = self.get_colocation_port_fields()
         for idx, port_dict in enumerate(self.colocation_ports):
             if not isinstance(port_dict, dict):
                 raise SpecValidationError(
                     f"colocation_ports[{idx}] must be a dict with "
-                    f"fields: {', '.join(self.COLOCATION_PORT_FIELDS)}"
+                    f"fields: {', '.join(fields)}"
                 )
-            missing = [f for f in self.COLOCATION_PORT_FIELDS if f not in port_dict]
+            missing = [f for f in fields if f not in port_dict]
             if missing:
                 missing_str = ', '.join(missing)
-                format_str = ', '.join(f'{f!r}: <port>' for f in self.COLOCATION_PORT_FIELDS)
+                format_str = ', '.join(f'{f!r}: <port>' for f in fields)
                 raise SpecValidationError(
                     f"Invalid NFS spec: colocation_ports[{idx}] missing required "
                     f"fields: {missing_str}. Expected format: {{{format_str}}}"
index 559d082340ece406fec5711b2e797866abc657c2..f497f3df222dedcd0e02625896e85f31bca0a059 100644 (file)
@@ -552,6 +552,61 @@ def test_alertmanager_spec_2():
     assert 'default_webhook_urls' in spec.user_data.keys()
 
 
+def test_nfs_spec_rdma_default():
+    """NFS spec without RDMA: enable_rdma is False, get_port_start returns 2 ports."""
+    spec = NFSServiceSpec(service_id='mynfs', placement=PlacementSpec(count=1))
+    assert spec.enable_rdma is False
+    assert spec.rdma_port is None
+    assert spec.get_port_start() == [2049, 9587]
+    assert spec.get_colocation_port_fields() == ['data_port', 'monitoring_port']
+
+
+def test_nfs_spec_rdma_enabled():
+    """NFS spec with enable_rdma: get_port_start returns 3 ports, default rdma_port 20049."""
+    spec = NFSServiceSpec(
+        service_id='mynfs',
+        placement=PlacementSpec(count=1),
+        enable_rdma=True,
+    )
+    assert spec.enable_rdma is True
+    assert spec.rdma_port is None
+    assert spec.get_port_start() == [2049, 9587, 20049]
+    assert spec.get_colocation_port_fields() == ['data_port', 'monitoring_port', 'rdma_port']
+
+
+def test_nfs_spec_rdma_custom_port():
+    """NFS spec with enable_rdma and custom rdma_port."""
+    spec = NFSServiceSpec(
+        service_id='mynfs',
+        placement=PlacementSpec(count=1),
+        port=3049,
+        monitoring_port=9588,
+        enable_rdma=True,
+        rdma_port=20050,
+    )
+    assert spec.enable_rdma is True
+    assert spec.rdma_port == 20050
+    assert spec.get_port_start() == [3049, 9588, 20050]
+
+
+def test_nfs_spec_from_json_rdma():
+    """NFS spec enable_rdma and rdma_port roundtrip via from_json/to_json."""
+    data = {
+        'service_id': 'mynfs',
+        'service_type': 'nfs',
+        'placement': {'count': 1},
+        'spec': {
+            'enable_rdma': True,
+            'rdma_port': 1234,
+        },
+    }
+    spec = NFSServiceSpec.from_json(data)
+    assert spec.enable_rdma is True
+    assert spec.rdma_port == 1234
+    out = spec.to_json()
+    assert out.get('spec', {}).get('enable_rdma') is True
+    assert out.get('spec', {}).get('rdma_port') == 1234
+
 
 def test_repr():
     val = """ServiceSpec.from_json(yaml.safe_load('''service_type: crash