self.networks[host] = nets
self.last_network_update[host] = datetime_now()
+ def get_interface_for_ip(self, host: str, ip: str) -> Optional[str]:
+ """Return the network interface name that has the given IP on host, or None."""
+ for _subnet, ifaces in self.networks.get(host, {}).items():
+ for iface, ips in ifaces.items():
+ if ip in ips:
+ return iface
+ return None
+
def update_daemon_config_deps(self, host: str, name: str, deps: List[str], stamp: datetime.datetime) -> None:
self.daemon_config_deps[host][name] = {
'deps': deps,
self.mgr.cache.save_host(host)
return None
+ async def get_rdma_devices(self, host: str) -> List[Dict[str, Any]]:
+ """Return list of RDMA devices on host from cephadm list-rdma, or [] on error."""
+ try:
+ out = await self._run_cephadm_json(
+ host, 'mon', 'list-rdma', [], no_fsid=True,
+ log_output=self.mgr.log_refresh_metadata)
+ return out if isinstance(out, list) else []
+ except OrchestratorError as e:
+ self.log.error('Failed to get RDMA devices for host %s: %s', host, e)
+ return []
+
def _refresh_host_osdspec_previews(self, host: str) -> Optional[str]:
self.update_osdspec_previews(host)
self.mgr.cache.save_host(host)
logger.warning(f'Bind address in {daemon_type}.{daemon_id}\'s ganesha conf is defaulting to empty')
else:
logger.debug("using haproxy bind address: %r", bind_addr)
- if spec.enable_rdma:
- logger.warning(
- 'NFS RDMA is enabled with Bind_Addr %s on host %s. '
- 'Ensure the network interface for this address is RDMA-capable. '
- "On the host, run 'rdma link show' and confirm the netdev for the interface "
- 'with this IP is listed.',
- bind_addr.split('/')[0] if bind_addr else bind_addr,
- host,
+
+ if spec.enable_rdma:
+ from cephadm.serve import CephadmServe
+ rdma_devices = self.mgr.wait_async(
+ CephadmServe(self.mgr).get_rdma_devices(host))
+ if not rdma_devices:
+ raise OrchestratorError(
+ f'NFS RDMA is enabled but host {host} has no RDMA devices. '
+ "Run 'cephadm list-rdma' on the host to verify RDMA is available."
)
+ if bind_addr:
+ bind_ip = bind_addr.split('/')[0]
+ iface = self.mgr.cache.get_interface_for_ip(host, bind_ip)
+ if iface:
+ rdma_netdevs = {d.get('netdev', '') for d in rdma_devices}
+ if iface not in rdma_netdevs:
+ raise OrchestratorError(
+ f'NFS RDMA is enabled with bind address {bind_addr} on host {host}, '
+ f'but interface {iface} (for this IP) is not RDMA-capable. '
+ f'RDMA netdevs on host: {sorted(rdma_netdevs)}. '
+ "Use an IP on an RDMA-capable interface or run 'rdma link show' on the host."
+ )
if monitoring_ip:
daemon_spec.port_ips.update({str(monitoring_port): monitoring_ip})
'user': rgw_user,
'keyring': rgw_keyring,
}
+ config['enable_rdma'] = spec.enable_rdma
logger.debug('Generated cephadm config-json: %s' % config)
return config
)
assert expected_tls_block in ganesha_conf
+ @patch("cephadm.serve.CephadmServe._run_cephadm_json")
@patch("cephadm.serve.CephadmServe._run_cephadm")
@patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
@patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
@patch("cephadm.services.nfs.NFSService.purge", MagicMock())
@patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
- def test_nfs_config_rdma_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ def test_nfs_config_rdma_enabled(self, _run_cephadm, _run_cephadm_json, cephadm_module: CephadmOrchestrator):
"""NFS with enable_rdma=True: ganesha.conf has RDMA protocols (nfsrdma, rpcrdma)."""
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ # Mock list-rdma only: return RDMA devices for list-rdma; [] for ls; {} for others (.get)
+
+ async def mock_list_rdma(host, entity, command, *args, **kwargs):
+ if command == 'list-rdma':
+ return [{'link': 'rdma0/1', 'state': 'ACTIVE',
+ 'physical_state': 'LINK_UP', 'netdev': 'eth0'}]
+ if command == 'ls':
+ return []
+ return {}
+ _run_cephadm_json.side_effect = mock_list_rdma
with with_host(cephadm_module, 'host1', addr='1.2.3.7'):
nfs_spec = NFSServiceSpec(
ganesha_conf = nfs_generated_conf['files']['ganesha.conf']
assert "Protocols = 3, 4, nfsrdma, rpcrdma" in ganesha_conf
+ @patch("cephadm.serve.CephadmServe._run_cephadm_json")
@patch("cephadm.serve.CephadmServe._run_cephadm")
@patch("cephadm.services.nfs.NFSService.fence_old_ranks", MagicMock())
@patch("cephadm.services.nfs.NFSService.run_grace_tool", MagicMock())
@patch("cephadm.services.nfs.NFSService.purge", MagicMock())
@patch("cephadm.services.nfs.NFSService.create_rados_config_obj", MagicMock())
- def test_nfs_config_rdma_custom_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ def test_nfs_config_rdma_custom_port(self, _run_cephadm, _run_cephadm_json, cephadm_module: CephadmOrchestrator):
"""NFS with enable_rdma and rdma_port: ganesha.conf has NFS_RDMA_Port."""
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ # Mock list-rdma only: return RDMA devices for list-rdma; [] for ls; {} for others (.get)
+
+ async def mock_list_rdma(host, entity, command, *args, **kwargs):
+ if command == 'list-rdma':
+ return [{'link': 'rdma0/1', 'state': 'ACTIVE',
+ 'physical_state': 'LINK_UP', 'netdev': 'eth0'}]
+ if command == 'ls':
+ return []
+ return {}
+ _run_cephadm_json.side_effect = mock_list_rdma
with with_host(cephadm_module, 'host1', addr='1.2.3.7'):
nfs_spec = NFSServiceSpec(