]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow draining host without removing conf/keyring files
authorAdam King <adking@redhat.com>
Tue, 21 Feb 2023 18:53:32 +0000 (13:53 -0500)
committerAdam King <adking@redhat.com>
Fri, 15 Mar 2024 19:20:51 +0000 (15:20 -0400)
Fixes: https://tracker.ceph.com/issues/58820
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 871aefb11d0a736d66150fee40c213f4210fead4)

doc/cephadm/host-management.rst
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py

index 396ceb22d8900636d2271a020ee2c35ac44e08f5..5a5c838a94516d043435535f4aaf7e1c1c871fcf 100644 (file)
@@ -101,6 +101,17 @@ To drain all daemons from a host, run a command of the following form:
 The ``_no_schedule`` and ``_no_conf_keyring`` labels will be applied to the
 host. See :ref:`cephadm-special-host-labels`.
 
+If you only want to drain daemons but leave managed ceph conf and keyring
+files on the host, you may pass the ``--keep-conf-keyring`` flag to the
+drain command.
+
+.. prompt:: bash #
+
+   ceph orch host drain *<host>* --keep-conf-keyring
+
+This will apply the ``_no_schedule`` label to the host but not the
+``_no_conf_keyring`` label.
+
 If you want to drain daemons but leave managed `ceph.conf` and keyring
 files on the host, you may pass the ``--keep-conf-keyring`` flag to the
 drain command.
@@ -192,6 +203,12 @@ The following host labels have a special meaning to cephadm.  All start with ``_
   an existing host that already contains Ceph daemons, it will cause cephadm to move
   those daemons elsewhere (except OSDs, which are not removed automatically).
 
+* ``_no_conf_keyring``: *Do not deploy config files or keyrings on this host*.
+
+  This label is effectively the same as ``_no_schedule`` but instead of working for
+  daemons it works for client keyrings and ceph conf files that are being managed
+  by cephadm
+
 * ``_no_autotune_memory``: *Do not autotune memory on this host*.
 
   This label will prevent daemon memory from being tuned even when the
index e21fb2ff2f11c1e42b3d6ef25577adfe3617e072..755b7b0a51e7e9fcd21f26dcfa67e1ddeb159dc6 100644 (file)
@@ -300,6 +300,7 @@ class TestCephAdm(object):
     @mock.patch('cephadm.migrate_sysctl_dir')
     @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
     @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
+    @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (167, 167))
     @mock.patch('cephadm.get_deployment_container')
     def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _get_parm, _deploy_daemon, _file_lock, _logger):
         """
index 5cc18b6f485312fd99aca0dd293564aa7a4e2aee..a829925bb964422b0b1c147869fbc5f7f872c8a2 100644 (file)
@@ -1007,6 +1007,24 @@ class HostCache():
             )
         ]
 
+    def get_conf_keyring_available_hosts(self) -> List[HostSpec]:
+        """
+        Returns all hosts without _no_conf_keyring label that
+        have had a refresh
+
+        Any host without that label is considered fair game for
+        a client keyring spec to match. We want to still wait
+        for refresh here so that we know what keyrings we've
+        already deployed here
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs()
+            if (
+                self.host_had_daemon_refresh(h.hostname)
+                and '_no_conf_keyring' not in h.labels
+            )
+        ]
+
     def get_non_draining_hosts(self) -> List[HostSpec]:
         """
         Returns all hosts that do not have _no_schedule label.
@@ -1028,6 +1046,15 @@ class HostCache():
             h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels
         ]
 
+    def get_conf_keyring_draining_hosts(self) -> List[HostSpec]:
+        """
+        Returns all hosts that have _no_conf_keyring label and therefore should have
+        no config files or client keyring placed on them, but are potentially still reachable
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels
+        ]
+
     def get_unreachable_hosts(self) -> List[HostSpec]:
         """
         Return all hosts that are offline or in maintenance mode.
index 08f28d3456f74b4d6800fc79116ec7c9098e6393..5c39ae2c526f9dfecd6c85ccb4561d38b019ceec 100644 (file)
@@ -3189,8 +3189,7 @@ Then run the following:
         return self.to_remove_osds.all_osds()
 
     @handle_orch_error
-    def drain_host(self, hostname, force=False):
-        # type: (str, bool) -> str
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> str:
         """
         Drain all daemons from a host.
         :param host: host name
@@ -3210,6 +3209,8 @@ Then run the following:
                                                   " what you want rerun this command with --force.")
 
         self.add_host_label(hostname, '_no_schedule')
+        if not keep_conf_keyring:
+            self.add_host_label(hostname, '_no_conf_keyring')
 
         daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
 
index b2467da7316088b883872d53528a4291b40d7383..fe565da327f756e9cb6d84203b2478e875774cdc 100644 (file)
@@ -1113,9 +1113,9 @@ class CephadmServe:
                 pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts)
                 ha = HostAssignment(
                     spec=ServiceSpec('mon', placement=pspec),
-                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    hosts=self.mgr.cache.get_conf_keyring_available_hosts(),
                     unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
-                    draining_hosts=self.mgr.cache.get_draining_hosts(),
+                    draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(),
                     daemons=[],
                     networks=self.mgr.cache.networks,
                 )
@@ -1144,9 +1144,9 @@ class CephadmServe:
                     keyring.encode('utf-8')).digest())
                 ha = HostAssignment(
                     spec=ServiceSpec('mon', placement=ks.placement),
-                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    hosts=self.mgr.cache.get_conf_keyring_available_hosts(),
                     unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
-                    draining_hosts=self.mgr.cache.get_draining_hosts(),
+                    draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(),
                     daemons=[],
                     networks=self.mgr.cache.networks,
                 )
index 8e09eb9400116679093bbc5b7eb10c05ec756587..58c119a829b5bf4f19c3e37fb799dc45d92eaed6 100644 (file)
@@ -8,7 +8,7 @@ import pytest
 
 from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
 from cephadm.serve import CephadmServe
-from cephadm.inventory import HostCacheStatus
+from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
 from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
 
 try:
@@ -2001,6 +2001,35 @@ osd_k2 = osd_v2
 
         assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf
 
+    def test_client_keyrings_special_host_labels(self, cephadm_module):
+        cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
+        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule']))
+        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring']))
+        # hosts need to be marked as having had refresh to be available for placement
+        # so "refresh" with empty daemon list
+        cephadm_module.cache.update_host_daemons('host1', {})
+        cephadm_module.cache.update_host_daemons('host2', {})
+        cephadm_module.cache.update_host_daemons('host3', {})
+
+        assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+
+        assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+
+        cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1')))
+
+        with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+            _mon_cmd.return_value = (0, 'real-keyring', '')
+            client_files = CephadmServe(cephadm_module)._calc_client_files()
+            assert 'host1' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys()
+            assert 'host2' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys()
+            assert 'host3' not in client_files.keys()
+
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         def check_registry_credentials(url, username, password):
index a58143807d1807b3d98c5b9ddbd47ef46dffb0bb..02618d60db7a82033f7f72d32d47b094eab87163 100644 (file)
@@ -355,7 +355,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def drain_host(self, hostname: str, force: bool = False) -> OrchResult[str]:
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> OrchResult[str]:
         """
         drain all daemons from a host
 
index 4da96ab6d762bed90555a7ab92e603db9dbd0842..3206827f4506d35d59a28c0b9ef2b6dbab7732b0 100644 (file)
@@ -451,9 +451,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command('orch host drain')
-    def _drain_host(self, hostname: str, force: bool = False) -> HandleCommandResult:
+    def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> HandleCommandResult:
         """drain all daemons from a host"""
-        completion = self.drain_host(hostname, force)
+        completion = self.drain_host(hostname, force, keep_conf_keyring)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())