]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: fixing scheduler consistent hashing
authorRedouane Kachach <rkachach@redhat.com>
Tue, 28 Jun 2022 13:32:19 +0000 (15:32 +0200)
committerRedouane Kachach <rkachach@redhat.com>
Wed, 29 Jun 2022 09:50:42 +0000 (11:50 +0200)
Signed-off-by: Redouane Kachach <rkachach@redhat.com>
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/tests/test_scheduling.py

index 612c558043c8f59e600543c0a418f5c77915f547..5002ec6e5060e66f71c378f8e574bb8639384ec1 100644 (file)
@@ -353,14 +353,7 @@ class HostAssignment(object):
             for i in range(len(to_add)):
                 to_add[i] = to_add[i].assign_rank_generation(ranks[i], self.rank_map)
 
-        # If we don't have <count> the list of candidates is definitive.
-        if count is None:
-            final = existing_slots + to_add
-            logger.debug('Provided hosts: %s' % final)
-            return self.place_per_host_daemons(final, to_add, to_remove)
-
-        logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (
-            existing, to_add))
+        logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (existing, to_add))
         return self.place_per_host_daemons(existing_slots + to_add, to_add, to_remove)
 
     def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
@@ -429,15 +422,17 @@ class HostAssignment(object):
             if len(old) > len(ls):
                 logger.debug('Filtered %s down to %s' % (old, ls))
 
-        # shuffle for pseudo random selection
-        # gen seed off of self.spec to make shuffling deterministic
+        # now that we have the list of nodes candidates based on the configured
+        # placement, let's shuffle the list for node pseudo-random selection. For this,
+        # we generate a seed from the service name and we use to shuffle the candidates.
+        # This makes shuffling deterministic for the same service name.
         seed = int(
             hashlib.sha1(self.spec.service_name().encode('utf-8')).hexdigest(),
             16
-        ) % (2 ** 32)
+        ) % (2 ** 32)  # truncate result to 32 bits
         final = sorted(ls)
         random.Random(seed).shuffle(final)
-        return ls
+        return final
 
     def remove_non_maintenance_unreachable_candidates(self, candidates: List[DaemonPlacement]) -> List[DaemonPlacement]:
         in_maintenance: Dict[str, bool] = {}
index c70ef9fb5ee1106ebffde9b37aa4a5a00cd91ad9..52ca820450b114245ff9a6a0ac44a42e7c3ab2c9 100644 (file)
@@ -157,39 +157,6 @@ def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems):
             assert_res(e)
 
 
-# * first match from the top wins
-# * where e=[], *=any
-#
-#       + list of known hosts available for scheduling (host_key)
-#       |   + hosts used for explict placement (explicit_key)
-#       |   |   + count
-#       |   |   | + section (host, label, pattern)
-#       |   |   | |     + expected result
-#       |   |   | |     |
-test_explicit_scheduler_results = [
-    (k("*   *   0 *"), error(SpecValidationError, 'num/count must be >= 1')),
-    (k("*   e   N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')),
-    (k("*   e   N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')),
-    (k("*   e   N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')),
-    (k("*   e   * *"), none),
-    (k("1   12  * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")),
-    (k("1   123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")),
-    (k("1   *   * *"), exactly('1')),
-    (k("12  1   * *"), exactly('1')),
-    (k("12  12  1 *"), one_of('1', '2')),
-    (k("12  12  * *"), exactly('1', '2')),
-    (k("12  123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")),
-    (k("12  123 1 *"), one_of('1', '2', '3')),
-    (k("12  123 * *"), two_of('1', '2', '3')),
-    (k("123 1   * *"), exactly('1')),
-    (k("123 12  1 *"), one_of('1', '2')),
-    (k("123 12  * *"), exactly('1', '2')),
-    (k("123 123 1 *"), one_of('1', '2', '3')),
-    (k("123 123 2 *"), two_of('1', '2', '3')),
-    (k("123 123 * *"), exactly('1', '2', '3')),
-]
-
-
 @pytest.mark.parametrize("dp,n,result",
     [   # noqa: E128
         (
@@ -240,6 +207,39 @@ def test_daemon_placement_match(dp, dd, result):
     assert dp.matches_daemon(dd) == result
 
 
+# * first match from the top wins
+# * where e=[], *=any
+#
+#       + list of known hosts available for scheduling (host_key)
+#       |   + hosts used for explict placement (explicit_key)
+#       |   |   + count
+#       |   |   | + section (host, label, pattern)
+#       |   |   | |     + expected result
+#       |   |   | |     |
+test_explicit_scheduler_results = [
+    (k("*   *   0 *"), error(SpecValidationError, 'num/count must be >= 1')),
+    (k("*   e   N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')),
+    (k("*   e   N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')),
+    (k("*   e   N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')),
+    (k("*   e   * *"), none),
+    (k("1   12  * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")),
+    (k("1   123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")),
+    (k("1   *   * *"), exactly('1')),
+    (k("12  1   * *"), exactly('1')),
+    (k("12  12  1 *"), one_of('1', '2')),
+    (k("12  12  * *"), exactly('1', '2')),
+    (k("12  123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")),
+    (k("12  123 1 *"), one_of('1', '2', '3')),
+    (k("12  123 * *"), two_of('1', '2', '3')),
+    (k("123 1   * *"), exactly('1')),
+    (k("123 12  1 *"), one_of('1', '2')),
+    (k("123 12  * *"), exactly('1', '2')),
+    (k("123 123 1 *"), one_of('1', '2', '3')),
+    (k("123 123 2 *"), two_of('1', '2', '3')),
+    (k("123 123 * *"), exactly('1', '2', '3')),
+]
+
+
 @pytest.mark.parametrize("spec_section_key,spec_section",
     [   # noqa: E128
         ('h', 'hosts'),
@@ -655,8 +655,8 @@ class NodeAssignmentTest(NamedTuple):
             [],
             {},
             {0: {0: None}, 1: {0: None}, 2: {0: None}},
-            ['nfs:host1(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'],
-            ['nfs:host1(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'],
+            ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+            ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
             []
         ),
         # 21: ranked, exist
@@ -669,8 +669,8 @@ class NodeAssignmentTest(NamedTuple):
             ],
             {0: {1: '0.1'}},
             {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}},
-            ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'],
-            ['nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'],
+            ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+            ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
             []
         ),
         # ranked, exist, different ranks
@@ -778,8 +778,8 @@ class NodeAssignmentTest(NamedTuple):
             ],
             {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}},
             {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}},
-            ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.4)'],
-            ['nfs:host2(rank=1.4)'],
+            ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'],
+            ['nfs:host3(rank=1.4)'],
             ['nfs.1.2']
         ),
         # ranked, not enough hosts
@@ -871,6 +871,78 @@ def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post
     assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
 
 
+class NodeAssignmentTest5(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    available_hosts: List[str]
+    candidates_hosts: List[str]
+
+
+@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates",
+    [
+        NodeAssignmentTest5(
+            'alertmanager',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host3 host1 host4 host2'.split(),
+        ),
+        NodeAssignmentTest5(
+            'prometheus',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host3 host2 host4 host1'.split(),
+        ),
+        NodeAssignmentTest5(
+            'grafana',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host2 host4 host3'.split(),
+        ),
+        NodeAssignmentTest5(
+            'mgr',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host4 host2 host1 host3'.split(),
+        ),
+        NodeAssignmentTest5(
+            'mon',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host3 host4 host2'.split(),
+        ),
+        NodeAssignmentTest5(
+            'rgw',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host1 host3 host2 host4'.split(),
+        ),
+        NodeAssignmentTest5(
+            'cephfs-mirror',
+            PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+            'host1 host2 host3 host4'.split(),
+            'host4 host3 host1 host2'.split(),
+        ),
+    ])
+def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates):
+    spec = None
+    service_id = None
+    allow_colo = False
+    spec = ServiceSpec(service_type=service_type,
+                       service_id=service_id,
+                       placement=placement)
+
+    candidates = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h, labels=['foo']) for h in available_hosts],
+        unreachable_hosts=[],
+        daemons=[],
+        allow_colo=allow_colo,
+    ).get_candidates()
+
+    candidates_hosts = [h.hostname for h in candidates]
+    assert candidates_hosts == expected_candidates
+
+
 class NodeAssignmentTest2(NamedTuple):
     service_type: str
     placement: PlacementSpec