]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: don't remove daemons from hosts in maintenance or offline mode
authorAdam King <adking@redhat.com>
Wed, 14 Apr 2021 19:29:58 +0000 (15:29 -0400)
committerSage Weil <sage@newdream.net>
Tue, 4 May 2021 16:19:21 +0000 (11:19 -0500)
Fixes: https://tracker.ceph.com/issues/50364
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit eebb842d0487660c93baf9eafda28a2f87e482f3)

src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/test_cephadm.py

index f17219730c421cdd5dc873f033c1948a08261129..ea2d8a296102d5156fdef0b17299b61a8530ddf0 100644 (file)
@@ -560,6 +560,8 @@ class CephadmServe:
 
         try:
             all_slots, slots_to_add, daemons_to_remove = ha.place()
+            daemons_to_remove = [d for d in daemons_to_remove if (d.hostname and self.mgr.inventory._inventory[d.hostname].get(
+                'status', '').lower() not in ['maintenance', 'offline'])]
             self.log.debug('Add %s, remove %s' % (slots_to_add, daemons_to_remove))
         except OrchestratorError as e:
             self.log.error('Failed to apply %s spec %s: %s' % (
index 8c1949e74db69aa3688d8b2a26d9d8e108d87cf5..db9bfc4c7a599bd8de27f294111f0f4532141f33 100644 (file)
@@ -984,6 +984,37 @@ class TestCephadm(object):
             out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
             assert out == HostSpec('test', 'test').to_json()
 
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module):
+        # test daemons on offline/maint hosts not removed when applying specs
+        # test daemons not added to hosts in maint/offline state
+        with with_host(cephadm_module, 'test1'):
+            with with_host(cephadm_module, 'test2'):
+                with with_host(cephadm_module, 'test3'):
+                    with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))):
+                        # should get a mgr on all 3 hosts
+                        # CephadmServe(cephadm_module)._apply_all_services()
+                        assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+
+                        # put one host in offline state and one host in maintenance state
+                        cephadm_module.inventory._inventory['test2']['status'] = 'offline'
+                        cephadm_module.inventory._inventory['test3']['status'] = 'maintenance'
+                        cephadm_module.inventory.save()
+
+                        # being in offline/maint mode should disqualify hosts from being
+                        # candidates for scheduling
+                        candidates = [
+                            h.hostname for h in cephadm_module._hosts_with_daemon_inventory()]
+                        assert 'test2' not in candidates
+                        assert 'test3' not in candidates
+
+                        with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))):
+                            # re-apply services. No mgr should be removed from maint/offline hosts
+                            # crash daemon should only be on host not in maint/offline mode
+                            CephadmServe(cephadm_module)._apply_all_services()
+                            assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+                            assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1
+
     def test_stale_connections(self, cephadm_module):
         class Connection(object):
             """