]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: Fixed Keepalived behavior for failover 66381/head
authorShweta Bhosale <Shweta.Bhosale1@ibm.com>
Mon, 24 Nov 2025 08:04:06 +0000 (13:34 +0530)
committerShweta Bhosale <Shweta.Bhosale1@ibm.com>
Mon, 19 Jan 2026 14:37:25 +0000 (20:07 +0530)
1. Keepalived will get failover to other node when daemon node is down
2. While removing extra daemon of Ingress service make sure to first remove it from non dependent daemon node

Fixes: https://tracker.ceph.com/issues/73949
Signed-off-by: Shweta Bhosale <Shweta.Bhosale1@ibm.com>
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/tests/test_scheduling.py
src/pybind/mgr/cephadm/utils.py

index fe3cc093efd7a12c0d3ab9ece5cbcc0b40e05f46..166a8326168ae7fff20cf854db216b33c6622ea6 100644 (file)
@@ -171,7 +171,7 @@ class DaemonPlacement(NamedTuple):
             if self.ports:
                 if self.ports != dd.ports and dd.ports:
                     return False
-                if self.ip != dd.ip and dd.ip:
+                if self.ip and dd.ip and self.ip != dd.ip:
                     return False
         return True
 
@@ -436,10 +436,36 @@ class HostAssignment(object):
             # The number of new slots that need to be selected in order to fulfill count
             need = count - len(existing)
 
-            # we don't need any additional placements
-            if need <= 0:
-                to_remove.extend(existing[count:])
-                del existing_slots[count:]
+            # Scaling down: more daemons exist than required.
+            # When related services exist, prioritize keeping daemons co-located with them
+            # by removing from non-related hosts first, then from related hosts if needed.
+            if need < 0:
+                non_matching_daemons = []
+                if self.related_service_daemons:
+                    # Get unique hostnames where related service daemons are running
+                    related_service_hosts = list(set(dd.hostname for dd in self.related_service_daemons))
+
+                    total_excess = len(existing) - count
+                    to_delete = []
+                    # First, prefer removing daemons from hosts that don't have related services
+                    non_related = [dd for dd in existing if dd.hostname not in related_service_hosts]
+                    to_delete.extend(non_related[-total_excess:])
+
+                    # If we still need to remove more, remove from hosts with related services
+                    remaining_needed = total_excess - len(to_delete)
+                    if remaining_needed > 0:
+                        remaining = [dd for dd in existing if dd not in to_delete]
+                        to_delete.extend(remaining[count:])
+
+                    non_matching_daemons = to_delete
+                else:
+                    # No related services - simply remove excess daemons beyond target count
+                    non_matching_daemons = existing[count:]
+
+                to_remove.extend(non_matching_daemons)
+                # remove from  existing_slots
+                non_matching_hostnames = {dd.hostname for dd in non_matching_daemons}
+                existing_slots = [slot for slot in existing_slots if slot.hostname not in non_matching_hostnames]
                 return self.place_per_host_daemons(existing_slots, [], to_remove)
 
             if self.related_service_daemons:
index 926afa6ba372217a81545b1e448d81170ea6e5b1..bb8cb572b6214657cb2fafa0a23df942c8f9a9c8 100644 (file)
@@ -1857,3 +1857,166 @@ def test_blocking_daemon_host(
     ).place()
     assert sorted([h.hostname for h in to_add]) in expected_add
     assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+@pytest.mark.parametrize(
+    "service_type,placement,hosts,daemons,related_service_daemons,rank_map,expected,expected_add,expected_remove",
+    [
+        (
+            'ingress',
+            PlacementSpec(count=2),
+            'host1 host2 host3 host4'.split(),
+            [
+                DaemonDescription('haproxy', 'ingress1', 'host1'),
+                DaemonDescription('keepalived', 'ingress1', 'host1'),
+                DaemonDescription('haproxy', 'ingress2', 'host2'),
+                DaemonDescription('keepalived', 'ingress2', 'host2'),
+                DaemonDescription('haproxy', 'ingress3', 'host3'),
+                DaemonDescription('keepalived', 'ingress3', 'host3'),
+                DaemonDescription('haproxy', 'ingress4', 'host4'),
+                DaemonDescription('keepalived', 'ingress4', 'host4'),
+            ],
+            [
+                DaemonDescription('nfs', 'nfs1', 'host1'),
+                DaemonDescription('nfs', 'nfs3', 'host3'),
+            ],
+            None,
+            ['haproxy:host1(*:443,8888)', 'haproxy:host3(*:443,8888)', 'keepalived:host1', 'keepalived:host3'],
+            [],
+            ['haproxy.ingress2', 'haproxy.ingress4', 'keepalived.ingress2', 'keepalived.ingress4']  # to_remove
+        ),
+        (
+            'ingress',
+            PlacementSpec(count=1),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('haproxy', 'ingress1', 'host1'),
+                DaemonDescription('keepalived', 'ingress1', 'host1'),
+                DaemonDescription('haproxy', 'ingress2', 'host2'),
+                DaemonDescription('keepalived', 'ingress2', 'host2'),
+                DaemonDescription('haproxy', 'ingress3', 'host3'),
+                DaemonDescription('keepalived', 'ingress3', 'host3'),
+            ],
+            [
+                DaemonDescription('nfs', 'nfs1', 'host1'),
+                DaemonDescription('nfs', 'nfs2', 'host2'),
+            ],
+            None,
+            ['haproxy:host1(*:443,8888)', 'keepalived:host1'],
+            [],
+            ['haproxy.ingress2', 'haproxy.ingress3', 'keepalived.ingress2', 'keepalived.ingress3']  # to_remove
+        ),
+        (
+            'nfs',
+            PlacementSpec(count=1),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', 'nfs1', 'host1'),
+                DaemonDescription('nfs', 'nfs2', 'host2'),
+            ],
+            [],
+            None,
+            ['nfs:host1'],
+            [],
+            ['nfs.nfs2']  # to_remove
+        ),
+        (
+            'ingress',
+            PlacementSpec(count=2),
+            'host1 host2 host3 host4 host5 host6'.split(),
+            [
+                DaemonDescription('haproxy', 'ingress1', 'host1'),
+                DaemonDescription('keepalived', 'ingress1', 'host1'),
+                DaemonDescription('haproxy', 'ingress2', 'host2'),
+                DaemonDescription('keepalived', 'ingress2', 'host2'),
+                DaemonDescription('haproxy', 'ingress3', 'host3'),
+                DaemonDescription('keepalived', 'ingress3', 'host3'),
+                DaemonDescription('haproxy', 'ingress4', 'host4'),
+                DaemonDescription('keepalived', 'ingress4', 'host4'),
+            ],
+            [
+                DaemonDescription('nfs', 'nfs5', 'host5'),
+                DaemonDescription('nfs', 'nfs6', 'host6'),
+            ],
+            None,
+            ['haproxy:host1(*:443,8888)', 'haproxy:host2(*:443,8888)', 'keepalived:host1', 'keepalived:host2'],
+            [],
+            ['haproxy.ingress3', 'haproxy.ingress4', 'keepalived.ingress3', 'keepalived.ingress4']  # to_remove
+        ),
+        (
+            'nfs',
+            PlacementSpec(count=2),
+            'host1 host2 host3 host4'.split(),
+            [
+                DaemonDescription('nfs', 'nfs1', 'host1'),
+                DaemonDescription('nfs', 'nfs2', 'host2'),
+                DaemonDescription('nfs', 'nfs3', 'host3'),
+                DaemonDescription('nfs', 'nfs4', 'host4'),
+            ],
+            [
+                DaemonDescription('haproxy', 'ingress3', 'host3'),
+                DaemonDescription('keepalived', 'ingress3', 'host3'),
+                DaemonDescription('haproxy', 'ingress4', 'host4'),
+                DaemonDescription('keepalived', 'ingress4', 'host4'),
+            ],
+            None,
+            ['nfs:host3', 'nfs:host4'],
+            [],
+            ['nfs.nfs1', 'nfs.nfs2']  # to_remove
+        ),
+        (
+            'nfs',
+            PlacementSpec(count=2),
+            'host1 host2 host3'.split(),
+            [
+                DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+                DaemonDescription('nfs', '0.1', 'host2', rank=0, rank_generation=1),
+                DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1),
+            ],
+            [
+                DaemonDescription('haproxy', 'ingress2', 'host2'),
+                DaemonDescription('keepalived', 'ingress2', 'host2'),
+                DaemonDescription('haproxy', 'ingress3', 'host3'),
+                DaemonDescription('keepalived', 'ingress3', 'host3'),
+            ],
+            {0: {1: '0.1', 2: '0.2'}, 1: {1: '1.1'}},
+            ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.1)'],
+            [],
+            ['nfs.0.1']
+        ),
+    ])
+def test_related_service_downsize(service_type, placement, hosts, daemons, related_service_daemons, rank_map, expected, expected_add, expected_remove):
+    if service_type == 'ingress':
+        spec = IngressSpec(
+            service_type='ingress',
+            service_id='nfs.test',
+            frontend_port=443,
+            monitor_port=8888,
+            virtual_ip='10.0.0.20/8',
+            backend_service='nfs.test',
+            placement=placement,
+        )
+    else:
+        spec = ServiceSpec(service_type=service_type,
+                           service_id='test',
+                           placement=placement)
+
+    all_slots, to_add, to_remove = HostAssignment(
+        spec=spec,
+        hosts=[HostSpec(h) for h in hosts],
+        unreachable_hosts=[],
+        draining_hosts=[],
+        daemons=daemons,
+        related_service_daemons=related_service_daemons,
+        primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type,
+        per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None,
+        rank_map=rank_map
+    ).place()
+
+    got = [str(p) for p in all_slots]
+    assert sorted(got) == sorted(expected)
+
+    got_add = [str(p) for p in to_add]
+    assert sorted(got_add) == sorted(expected_add)
+
+    assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
index 9ef394e86dc3f644100a4d86fb4cb14fc83a2aa6..cb17faf2a306635ca910a17c9fe3daffc66c472a 100644 (file)
@@ -28,7 +28,7 @@ GATEWAY_TYPES = ['iscsi', 'nfs', 'nvmeof', 'smb']
 MONITORING_STACK_TYPES = ['node-exporter', 'prometheus',
                           'alertmanager', 'grafana', 'loki', 'promtail', 'alloy']
 MGMT_GATEWAY_STACK_TYPES = ['mgmt-gateway', 'oauth2-proxy']
-RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['haproxy', 'nfs']
+RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['haproxy', 'nfs', 'keepalived']
 
 CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES + MGMT_GATEWAY_STACK_TYPES