From 63d13df4eb469fb6f5d85ee06184e7df670aa193 Mon Sep 17 00:00:00 2001 From: Adam King Date: Mon, 16 May 2022 20:44:11 -0400 Subject: [PATCH] mgr/cephadm: force fail over when we want to remove active mgr Fixes: https://tracker.ceph.com/issues/55679 Signed-off-by: Adam King --- src/pybind/mgr/cephadm/serve.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index b6b4211e78a77..8568cc12b3029 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -799,6 +799,14 @@ class CephadmServe: self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len( daemon_place_fails), daemon_place_fails) + if service_type == 'mgr': + active_mgr = svc.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr')) + if active_mgr.daemon_id in [d.daemon_id for d in daemons_to_remove]: + # We can't just remove the active mgr like any other daemon. + # Need to fail over later so it can be removed on next pass. + # This can be accomplished by scheduling a restart of the active mgr. + self.mgr._schedule_daemon_action(active_mgr.name(), 'restart') + # remove any? def _ok_to_stop(remove_daemons: List[orchestrator.DaemonDescription]) -> bool: daemon_ids = [d.daemon_id for d in remove_daemons] -- 2.39.5