From: Shweta Bhosale Date: Thu, 22 Jan 2026 10:09:41 +0000 (+0530) Subject: mgr/cephadm: Allow colocation for NFS daemon to support active-active mode, Spec... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4848297c6576b4c8b0c13d546688f16fba4e5573;p=ceph-ci.git mgr/cephadm: Allow colocation for NFS daemon to support active-active mode, Spec will have colocation_ports field to accept ports for colocating daemons Fixes: https://tracker.ceph.com/issues/74479 Signed-off-by: Shweta Bhosale --- diff --git a/doc/cephadm/services/nfs.rst b/doc/cephadm/services/nfs.rst index f7b2caabc37..55116dca24b 100644 --- a/doc/cephadm/services/nfs.rst +++ b/doc/cephadm/services/nfs.rst @@ -79,6 +79,94 @@ address is not present and ``monitoring_networks`` is specified, an IP address that matches one of the specified networks will be used. If neither condition is met, the default binding will happen on all available network interfaces. +NFS Daemon Colocation +---------------------- + +By default, cephadm avoids placing multiple NFS daemons on the same host. However, +you can enable colocation to deploy multiple NFS daemons on the same host for +increased capacity or redundancy. + +.. note:: + When a host becomes unavailable, cephadm will automatically redeploy the + affected NFS daemons on the remaining available hosts to maintain the desired + ``count``. This may result in multiple daemons running on the same host, + even if colocation was not explicitly configured. The system ensures that + the total number of running daemons matches the specified count across + all available hosts. + +Colocation with Custom Ports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For more control over port assignments, you can specify custom ports for colocated daemons +using the ``colocation_ports`` parameter: + +.. code-block:: yaml + + service_type: nfs + service_id: mynfs + placement: + count: 4 + hosts: + - host1 + - host2 + spec: + port: 2049 + monitoring_port: 9587 + colocation_ports: + - data_port: 3049 + monitoring_port: 9588 + - data_port: 3050 + monitoring_port: 9589 + - data_port: 3051 + monitoring_port: 9590 + +In this configuration, 4 daemons total are deployed (2 per host), distributed across +``host1`` and ``host2``: + +* **host1, daemon 1**: ``port: 2049`` and ``monitoring_port: 9587`` +* **host1, daemon 2**: ``data_port: 3049`` and ``monitoring_port: 9588`` +* **host2, daemon 1**: ``port: 2049`` and ``monitoring_port: 9587`` +* **host2, daemon 2**: ``data_port: 3049`` and ``monitoring_port: 9588`` + +.. note:: + * The ``colocation_ports`` list defines ports for **additional** daemons only + (2nd, 3rd, 4th, etc.). The first daemon always uses the base ``port`` and + ``monitoring_port`` from the spec. + * The number of entries in ``colocation_ports`` should be ``count - 1``, + to cover the node down scenario (or ``count_per_host - 1`` when using ``count_per_host``). + * Each entry must specify both ``data_port`` and ``monitoring_port``. + * **If ``colocation_ports`` is not specified**, ports will be automatically + incremented for colocated daemons (e.g., 2049 → 2050 → 2051 for data ports, + and 9587 → 9588 → 9589 for monitoring ports). + +Per-Host Colocation +~~~~~~~~~~~~~~~~~~~ + +You can also use ``count_per_host`` to specify exactly how many daemons should +run on each host: + +.. code-block:: yaml + + service_type: nfs + service_id: mynfs + placement: + count_per_host: 3 + hosts: + - host1 + - host2 + - host3 + spec: + port: 2049 + monitoring_port: 9587 + colocation_ports: + - data_port: 3049 + monitoring_port: 9588 + - data_port: 4049 + monitoring_port: 9589 + +This will deploy exactly 3 NFS daemons on each of the 3 hosts (9 daemons total), +with custom ports for the 2nd and 3rd daemons on each host. + TLS/SSL Example --------------- diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 86637d5aafc..94964b801e6 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -347,6 +347,24 @@ class HostAssignment(object): def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlacement]: r = [] + # Check if spec has custom colocation ports (converted to list format) + if hasattr(self.spec, 'get_colocation_ports_list'): + custom_ports_list = self.spec.get_colocation_ports_list() + if custom_ports_list: + # First daemon (i=0) always uses base ports from spec + # Additional daemons (i=1,2,...) use colocation_ports if available + for i in range(num): + if i == 0: + r.extend([dp.renumber_ports(0) for dp in ls]) + elif i - 1 < len(custom_ports_list): + ports = custom_ports_list[i - 1] + r.extend([DaemonPlacement( + dp.daemon_type, dp.hostname, dp.network, dp.name, + dp.ip, ports, dp.rank, dp.rank_generation + ) for dp in ls]) + else: + r.extend([dp.renumber_ports(i) for dp in ls]) + return r for offset in range(num): r.extend([dp.renumber_ports(offset) for dp in ls]) return r diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index d622e68e4b8..97a15294e4f 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -28,6 +28,9 @@ class NFSService(CephService): TYPE = 'nfs' DEFAULT_EXPORTER_PORT = 9587 + def allow_colo(self) -> bool: + return True + @property def needs_monitoring(self) -> bool: return True @@ -124,7 +127,7 @@ class NFSService(CephService): self.create_rados_config_obj(spec) port = daemon_spec.ports[0] if daemon_spec.ports else 2049 - monitoring_ip, monitoring_port = self.get_monitoring_details(daemon_spec.service_name, host) + monitoring_ip, monitoring_port = self.get_monitoring_details(daemon_spec.service_name, host, daemon_spec) # create the RGW keyring rgw_user = f'{rados_user}-rgw' @@ -421,9 +424,20 @@ class NFSService(CephService): cluster_ips.append(addrs[0]) return cluster_ips - def get_monitoring_details(self, service_name: str, host: str) -> Tuple[Optional[str], Optional[int]]: + def get_monitoring_details( + self, + service_name: str, + host: str, + daemon_spec: Optional['CephadmDaemonDeploySpec'] = None + ) -> Tuple[Optional[str], Optional[int]]: spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec) - monitoring_port = spec.monitoring_port if spec.monitoring_port else 9587 + + # For colocation, use the incremented monitoring port from daemon_spec.ports[1] if available + # Otherwise fall back to the spec's monitoring_port + if daemon_spec and daemon_spec.ports and len(daemon_spec.ports) > 1: + monitoring_port = daemon_spec.ports[1] + else: + monitoring_port = spec.monitoring_port if spec.monitoring_port else 9587 # check if monitor needs to be bind on specific ip monitoring_addr = spec.monitoring_ip_addrs.get(host) if spec.monitoring_ip_addrs else None diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py index 056576efbf0..dce4f635646 100644 --- a/src/pybind/mgr/cephadm/tests/test_scheduling.py +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -10,6 +10,7 @@ from ceph.deployment.service_spec import ( ServiceSpec, PlacementSpec, IngressSpec, + NFSServiceSpec, PatternType, HostPattern, ) @@ -801,7 +802,7 @@ class NodeAssignmentTest(NamedTuple): ['nfs:host3(rank=1.4 *:2049,9587)'], ['nfs.1.2'] ), - # ranked, not enough hosts + # ranked, not enough hosts (with colocation, 4th daemon can be placed) NodeAssignmentTest( 'nfs', PlacementSpec(count=4), @@ -811,9 +812,9 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), ], {0: {2: '0.2'}, 1: {2: '1.2'}}, - {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}}, - ['nfs:host1(rank=0.2 *:2049,9587)', 'nfs:host2(rank=1.2 *:2049,9587)', 'nfs:host3(rank=2.0 *:2049,9587)'], - ['nfs:host3(rank=2.0 *:2049,9587)'], + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}, 3: {0: None}}, + ['nfs:host1(rank=0.2 *:2049,9587)', 'nfs:host2(rank=1.2 *:2049,9587)', 'nfs:host3(rank=2.0 *:2049,9587)', 'nfs:host3(rank=3.0 *:2050,9588)'], + ['nfs:host3(rank=2.0 *:2049,9587)', 'nfs:host3(rank=3.0 *:2050,9588)'], [] ), # ranked, scale down @@ -832,7 +833,50 @@ class NodeAssignmentTest(NamedTuple): ['nfs:host2(rank=0.3 *:2049,9587)'], ['nfs.0.2', 'nfs.1.2', 'nfs.2.2'] ), - + # NFS colocation - count > hosts, ports should increment + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}, 3: {0: None}}, + ['nfs:host2(rank=0.0 *:2049,9587)', 'nfs:host1(rank=1.0 *:2049,9587)', + 'nfs:host2(rank=2.0 *:2050,9588)', 'nfs:host1(rank=3.0 *:2050,9588)'], + ['nfs:host2(rank=0.0 *:2049,9587)', 'nfs:host1(rank=1.0 *:2049,9587)', + 'nfs:host2(rank=2.0 *:2050,9588)', 'nfs:host1(rank=3.0 *:2050,9588)'], + [] + ), + # NFS colocation with existing daemons + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1, ports=[2049, 9587]), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1, ports=[2049, 9587]), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}, 3: {0: None}}, + ['nfs:host1(rank=0.1 *:2049,9587)', 'nfs:host2(rank=1.1 *:2049,9587)', + 'nfs:host2(rank=2.0 *:2050,9588)', 'nfs:host1(rank=3.0 *:2050,9588)'], + ['nfs:host2(rank=2.0 *:2050,9588)', 'nfs:host1(rank=3.0 *:2050,9588)'], + [] + ), + # NFS colocation with custom ports + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}, 3: {0: None}}, + ['nfs:host2(rank=0.0 *:2049,9587)', 'nfs:host1(rank=1.0 *:2049,9587)', + 'nfs:host2(rank=2.0 *:3049,9588)', 'nfs:host1(rank=3.0 *:3049,9588)'], + ['nfs:host2(rank=0.0 *:2049,9587)', 'nfs:host1(rank=1.0 *:2049,9587)', + 'nfs:host2(rank=2.0 *:3049,9588)', 'nfs:host1(rank=3.0 *:3049,9588)'], + [] + ), ]) def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map, expected, expected_add, expected_remove): @@ -847,9 +891,27 @@ def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post allow_colo = True elif service_type == 'nfs': service_id = 'mynfs' - spec = ServiceSpec(service_type=service_type, - service_id=service_id, - placement=placement) + allow_colo = True + # Check if this is the custom ports test by looking at expected ports + if expected and any('3049' in str(e) for e in expected): + # Custom colocation ports test case + # First daemon uses base ports (port, monitoring_port) + # colocation_ports defines ADDITIONAL daemons only + spec = NFSServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement, + port=2049, + monitoring_port=9587, + colocation_ports=[ + {'data_port': 3049, 'monitoring_port': 9588}, + {'data_port': 3050, 'monitoring_port': 9589}, + {'data_port': 3051, 'monitoring_port': 9590}, + {'data_port': 3052, 'monitoring_port': 9591} + ]) + else: + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) if not spec: spec = ServiceSpec(service_type=service_type, @@ -1266,6 +1328,50 @@ def test_bad_specs(service_type, placement, hosts, daemons, expected): assert str(e.value) == expected +def test_nfs_colocation_ports_validation(): + """Test validation of colocation_ports in NFSServiceSpec""" + from ceph.deployment.service_spec import SpecValidationError + # Valid case: correct number of colocation_ports (count=3, need 2 additional) + spec = NFSServiceSpec( + service_id='mynfs', + placement=PlacementSpec(count=3), + port=2049, + monitoring_port=9587, + colocation_ports=[ + {'data_port': 3049, 'monitoring_port': 9588}, + {'data_port': 4049, 'monitoring_port': 9589} + ] + ) + spec.validate() # Should not raise + + # Invalid case: too few colocation_ports (count=4, need 3 additional, but only 1 provided) + with pytest.raises(SpecValidationError) as e: + spec = NFSServiceSpec( + service_id='mynfs', + placement=PlacementSpec(count=4), + port=2049, + monitoring_port=9587, + colocation_ports=[{'data_port': 3049, 'monitoring_port': 9588}] + ) + spec.validate() + assert "colocation_ports requires 3 entries for count=4 (got 1)" in str(e.value) + + # Invalid case: missing required field + with pytest.raises(SpecValidationError) as e: + spec = NFSServiceSpec( + service_id='mynfs', + placement=PlacementSpec(count=3), + port=2049, + monitoring_port=9587, + colocation_ports=[ + {'data_port': 3049}, # Missing monitoring_port + {'data_port': 4049, 'monitoring_port': 9589} + ] + ) + spec.validate() + assert "missing required fields: monitoring_port" in str(e.value) + + class ActiveAssignmentTest(NamedTuple): service_type: str placement: PlacementSpec diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index fba421cdfe1..6999bbe739a 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1317,6 +1317,8 @@ yaml.add_representer(ServiceSpec, ServiceSpec.yaml_representer) class NFSServiceSpec(ServiceSpec): + COLOCATION_PORT_FIELDS = ['data_port', 'monitoring_port'] + def __init__(self, service_type: str = 'nfs', service_id: Optional[str] = None, @@ -1347,6 +1349,7 @@ class NFSServiceSpec(ServiceSpec): tls_debug: bool = False, tls_min_version: Optional[str] = None, tls_ciphers: Optional[str] = None, + colocation_ports: Optional[List[Dict[str, int]]] = None, ): assert service_type == 'nfs' super(NFSServiceSpec, self).__init__( @@ -1372,6 +1375,11 @@ class NFSServiceSpec(ServiceSpec): self.idmap_conf = idmap_conf self.enable_nlm = enable_nlm + # colocation_ports is a list of port dicts for ADDITIONAL colocated daemons + # The first daemon always uses port and monitoring_port from the spec + # Format: [{'data_port': 1234, 'monitoring_port': 5678}, ...] + self.colocation_ports = colocation_ports + # TLS fields self.tls_ciphers = tls_ciphers self.tls_ktls = tls_ktls @@ -1381,10 +1389,59 @@ class NFSServiceSpec(ServiceSpec): def get_port_start(self) -> List[int]: return [self.port or 2049, self.monitoring_port or 9587] + def get_colocation_ports_list(self) -> List[List[int]]: + """ + Convert the colocation_ports dictionary into a list of port lists + so the scheduler can handle port assignment in a generic way + """ + if not self.colocation_ports: + return [] + return [[port_dict[field] for field in self.COLOCATION_PORT_FIELDS] + for port_dict in self.colocation_ports] + def rados_config_name(self): # type: () -> str return 'conf-' + self.service_name() + def validate_colocation_ports(self) -> None: + """Validate colocation_ports configuration.""" + if not self.colocation_ports: + return + # Validate entry count matches placement requirements + if self.placement: + actual = len(self.colocation_ports) + if self.placement.count_per_host: + expected = self.placement.count_per_host - 1 + if actual < expected: + raise SpecValidationError( + f"colocation_ports requires {expected} entries for " + f"count_per_host={self.placement.count_per_host} (got {actual}). First " + "daemon uses base ports, remaining need custom ports." + ) + elif self.placement.count: + expected = self.placement.count - 1 + if actual < expected: + raise SpecValidationError( + f"colocation_ports requires {expected} entries for " + f"count={self.placement.count} (got {actual}). First daemon uses base " + "ports, remaining need custom ports." + ) + # Validate that each entry has the required port fields + for idx, port_dict in enumerate(self.colocation_ports): + if not isinstance(port_dict, dict): + raise SpecValidationError( + f"colocation_ports[{idx}] must be a dict with " + f"fields: {', '.join(self.COLOCATION_PORT_FIELDS)}" + ) + missing = [f for f in self.COLOCATION_PORT_FIELDS if f not in port_dict] + if missing: + missing_str = ', '.join(missing) + format_str = ', '.join(f'{f!r}: ' for f in self.COLOCATION_PORT_FIELDS) + raise SpecValidationError( + f"Invalid NFS spec: colocation_ports[{idx}] missing required " + f"fields: {missing_str}. Expected format: {{{format_str}}}" + ) + def validate(self) -> None: super(NFSServiceSpec, self).validate() @@ -1392,6 +1449,9 @@ class NFSServiceSpec(ServiceSpec): raise SpecValidationError("Invalid NFS spec: Cannot set virtual_ip and " f"{'ip_addrs' if self.ip_addrs else 'networks'} fields") + # Validate colocation_ports + self.validate_colocation_ports() + # TLS certificate validation if self.ssl and not self.certificate_source: raise SpecValidationError('If SSL is enabled, a certificate source must be provided.')