]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm/schedule: choose an IP from a subnet list
authorSage Weil <sage@newdream.net>
Thu, 11 Mar 2021 23:47:24 +0000 (18:47 -0500)
committerSage Weil <sage@newdream.net>
Tue, 16 Mar 2021 23:20:34 +0000 (19:20 -0400)
Choose an IP from the subnet list provided by the ServiceSpec.

A few caveats:
- we ignore hosts that don't have IPs in the given subnet
- the subnet matching is STRICT.  That is, the CIDR name has to exactly
match what is configured on the host.  That means you can't just say 10/8
to match any 10.whatever addres--you need the exactly network on the host
(e.g, 10.1.2.0/24).
- If you modify a servicespec and change the networks when there are
already deployed daemons, we will try to deploy the new instances on
the same ports but bound to a specific IP instead of *.  Which will fail.
You need to remove the service first, or remove the old daemons manually
so that creating new ones will succeed.

Signed-off-by: Sage Weil <sage@newdream.net>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/tests/test_scheduling.py

index b712f3a388a55b04a628afae69a1a3cd8e416dba..a6818a17958b3e800a014f616fc9cd3158375284 100644 (file)
@@ -2082,6 +2082,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         ha = HostAssignment(
             spec=spec,
             hosts=self._hosts_with_daemon_inventory(),
+            networks=self.cache.networks,
             daemons=self.cache.get_daemons_by_service(spec.service_name()),
             allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
         )
@@ -2138,6 +2139,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         HostAssignment(
             spec=spec,
             hosts=self.inventory.all_specs(),  # All hosts, even those without daemon refresh
+            networks=self.cache.networks,
             daemons=self.cache.get_daemons_by_service(spec.service_name()),
             allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
         ).validate()
index 98e3798a02aa8fdba99062b63de02ceacf02241c..914f178f3cad020ca09a634f5c9034131267ab11 100644 (file)
@@ -1,6 +1,6 @@
 import logging
 import random
-from typing import List, Optional, Callable, TypeVar, Tuple, NamedTuple
+from typing import List, Optional, Callable, TypeVar, Tuple, NamedTuple, Dict
 
 import orchestrator
 from ceph.deployment.service_spec import ServiceSpec
@@ -62,6 +62,7 @@ class HostAssignment(object):
                  spec,  # type: ServiceSpec
                  hosts: List[orchestrator.HostSpec],
                  daemons: List[orchestrator.DaemonDescription],
+                 networks: Dict[str, Dict[str, List[str]]] = {},
                  filter_new_host=None,  # type: Optional[Callable[[str],bool]]
                  allow_colo: bool = False,
                  ):
@@ -71,6 +72,7 @@ class HostAssignment(object):
         self.filter_new_host = filter_new_host
         self.service_name = spec.service_name()
         self.daemons = daemons
+        self.networks = networks
         self.allow_colo = allow_colo
         self.port_start = spec.get_port_start()
 
@@ -202,6 +204,13 @@ class HostAssignment(object):
             existing, to_add))
         return existing_slots + to_add, to_add, to_remove
 
+    def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
+        for subnet in subnets:
+            ips = self.networks.get(hostname, {}).get(subnet, [])
+            if ips:
+                return sorted(ips)[0]
+        return None
+
     def get_candidates(self) -> List[DaemonPlacement]:
         if self.spec.placement.hosts:
             ls = [
@@ -231,6 +240,20 @@ class HostAssignment(object):
             raise OrchestratorValidationError(
                 "placement spec is empty: no hosts, no label, no pattern, no count")
 
+        # allocate an IP?
+        if self.spec.networks:
+            orig = ls.copy()
+            ls = []
+            for p in orig:
+                ip = self.find_ip_on_host(p.hostname, self.spec.networks)
+                if ip:
+                    ls.append(DaemonPlacement(hostname=p.hostname, network=p.network,
+                                              name=p.name, port=p.port, ip=ip))
+                else:
+                    logger.debug(
+                        f'Skipping {p.hostname} with no IP in network(s) {self.spec.networks}'
+                    )
+
         if self.filter_new_host:
             old = ls.copy()
             ls = [h for h in ls if self.filter_new_host(h.hostname)]
index 3cf9435e91a2ebc6c646d25acb335fe0b9d2ed4a..4543d872b9965b13f6d3ac897e5b5700d34a4066 100644 (file)
@@ -554,6 +554,7 @@ class CephadmServe:
             spec=spec,
             hosts=self.mgr._hosts_with_daemon_inventory(),
             daemons=daemons,
+            networks=self.mgr.cache.networks,
             filter_new_host=matches_network if service_type == 'mon'
             else virtual_ip_allowed if service_type == 'ha-rgw' else None,
             allow_colo=svc.allow_colo(),
@@ -600,7 +601,8 @@ class CephadmServe:
 
                 daemon_spec = svc.make_daemon_spec(
                     slot.hostname, daemon_id, slot.network, spec, daemon_type=daemon_type,
-                    ports=[slot.port] if slot.port else None
+                    ports=[slot.port] if slot.port else None,
+                    ip=slot.ip,
                 )
                 self.log.debug('Placing %s.%s on host %s' % (
                     daemon_type, daemon_id, slot.hostname))
index ea48c5321f7586f1795e231fb82d3138e6455141..227417e869a2055c16bb5a658eab13d3bc712190 100644 (file)
@@ -124,7 +124,8 @@ class CephadmService(metaclass=ABCMeta):
             network: str,
             spec: ServiceSpecs,
             daemon_type: Optional[str] = None,
-            ports: Optional[List[int]] = None
+            ports: Optional[List[int]] = None,
+            ip: Optional[str] = None,
     ) -> CephadmDaemonDeploySpec:
         return CephadmDaemonDeploySpec(
             host=host,
@@ -133,6 +134,7 @@ class CephadmService(metaclass=ABCMeta):
             network=network,
             daemon_type=daemon_type,
             ports=ports,
+            ip=ip,
         )
 
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
index 408415e92be02d62362485e6398ce8405567fa32..f149066fc15d142309999518a0f23c388b5a494c 100644 (file)
@@ -2,7 +2,7 @@
 
 # fmt: off
 
-from typing import NamedTuple, List
+from typing import NamedTuple, List, Dict
 import pytest
 
 from ceph.deployment.hostspec import HostSpec
@@ -729,6 +729,72 @@ def test_node_assignment3(service_type, placement, hosts,
         assert h in [h.hostname for h in hosts]
 
 
+class NodeAssignmentTest4(NamedTuple):
+    spec: ServiceSpec
+    networks: Dict[str, Dict[str, List[str]]]
+    daemons: List[DaemonDescription]
+    expected: List[str]
+    expected_add: List[str]
+    expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove",
+    [   # noqa: E128
+        NodeAssignmentTest4(
+            ServiceSpec(
+                service_type='rgw',
+                service_id='foo',
+                placement=PlacementSpec(count=6, label='foo'),
+                networks=['10.0.0.0/8'],
+            ),
+            {
+                'host1': {'10.0.0.0/8': ['10.0.0.1']},
+                'host2': {'10.0.0.0/8': ['10.0.0.2']},
+                'host3': {'192.168.0.0/16': ['192.168.0.1']},
+            },
+            [],
+            ['host1(ip=10.0.0.1 port=80)', 'host2(ip=10.0.0.2 port=80)',
+             'host1(ip=10.0.0.1 port=81)', 'host2(ip=10.0.0.2 port=81)',
+             'host1(ip=10.0.0.1 port=82)', 'host2(ip=10.0.0.2 port=82)'],
+            ['host1(ip=10.0.0.1 port=80)', 'host2(ip=10.0.0.2 port=80)',
+             'host1(ip=10.0.0.1 port=81)', 'host2(ip=10.0.0.2 port=81)',
+             'host1(ip=10.0.0.1 port=82)', 'host2(ip=10.0.0.2 port=82)'],
+            []
+        ),
+    ])
+def test_node_assignment4(spec, networks, daemons,
+                          expected, expected_add, expected_remove):
+    all_slots, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
+        daemons=daemons,
+        allow_colo=True,
+        networks=networks,
+    ).place()
+
+    got = [str(p) for p in all_slots]
+    num_wildcard = 0
+    for i in expected:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    got = [str(p) for p in to_add]
+    num_wildcard = 0
+    for i in expected_add:
+        if i == '*':
+            num_wildcard += 1
+        else:
+            assert i in got
+            got.remove(i)
+    assert num_wildcard == len(got)
+
+    assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
 @pytest.mark.parametrize("placement",
     [   # noqa: E128
         ('1 *'),