]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow draining host without removing conf/keyring files
authorAdam King <adking@redhat.com>
Tue, 21 Feb 2023 18:53:32 +0000 (13:53 -0500)
committerAdam King <adking@redhat.com>
Tue, 15 Aug 2023 20:17:28 +0000 (16:17 -0400)
Fixes: https://tracker.ceph.com/issues/58820
Signed-off-by: Adam King <adking@redhat.com>
doc/cephadm/host-management.rst
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py

index 3f12ec1ce6beb04fd00972327c4ef0263788f4a6..2bf60388efc8cad76057dce0f1716f9be1b95894 100644 (file)
@@ -101,8 +101,19 @@ To drain all daemons from a host, run a command of the following form:
 
    ceph orch host drain *<host>*
 
-The ``_no_schedule`` label will be applied to the host. See
-:ref:`cephadm-special-host-labels`.
+The ``_no_schedule`` and ``_no_conf_keyring`` labels will be applied to the
+host. See :ref:`cephadm-special-host-labels`.
+
+If you only want to drain daemons but leave managed ceph conf and keyring
+files on the host, you may pass the ``--keep-conf-keyring`` flag to the
+drain command.
+
+.. prompt:: bash #
+
+   ceph orch host drain *<host>* --keep-conf-keyring
+
+This will apply the ``_no_schedule`` label to the host but not the
+``_no_conf_keyring`` label.
 
 All OSDs on the host will be scheduled to be removed. You can check the progress of the OSD removal operation with the following command:
 
@@ -183,6 +194,12 @@ The following host labels have a special meaning to cephadm.  All start with ``_
   an existing host that already contains Ceph daemons, it will cause cephadm to move
   those daemons elsewhere (except OSDs, which are not removed automatically).
 
+* ``_no_conf_keyring``: *Do not deploy config files or keyrings on this host*.
+
+  This label is effectively the same as ``_no_schedule`` but instead of working for
+  daemons it works for client keyrings and ceph conf files that are being managed
+  by cephadm
+
 * ``_no_autotune_memory``: *Do not autotune memory on this host*.
 
   This label will prevent daemon memory from being tuned even when the
index 5cc18b6f485312fd99aca0dd293564aa7a4e2aee..a829925bb964422b0b1c147869fbc5f7f872c8a2 100644 (file)
@@ -1007,6 +1007,24 @@ class HostCache():
             )
         ]
 
+    def get_conf_keyring_available_hosts(self) -> List[HostSpec]:
+        """
+        Returns all hosts without _no_conf_keyring label that
+        have had a refresh
+
+        Any host without that label is considered fair game for
+        a client keyring spec to match. We want to still wait
+        for refresh here so that we know what keyrings we've
+        already deployed here
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs()
+            if (
+                self.host_had_daemon_refresh(h.hostname)
+                and '_no_conf_keyring' not in h.labels
+            )
+        ]
+
     def get_non_draining_hosts(self) -> List[HostSpec]:
         """
         Returns all hosts that do not have _no_schedule label.
@@ -1028,6 +1046,15 @@ class HostCache():
             h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels
         ]
 
+    def get_conf_keyring_draining_hosts(self) -> List[HostSpec]:
+        """
+        Returns all hosts that have _no_conf_keyring label and therefore should have
+        no config files or client keyring placed on them, but are potentially still reachable
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels
+        ]
+
     def get_unreachable_hosts(self) -> List[HostSpec]:
         """
         Return all hosts that are offline or in maintenance mode.
index d022b8e293d832db77076d29b4368e41cdf83a4f..0b1283a882a2967d19848a71dd950038277314ba 100644 (file)
@@ -3332,8 +3332,7 @@ Then run the following:
         return self.to_remove_osds.all_osds()
 
     @handle_orch_error
-    def drain_host(self, hostname, force=False):
-        # type: (str, bool) -> str
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> str:
         """
         Drain all daemons from a host.
         :param host: host name
@@ -3353,6 +3352,8 @@ Then run the following:
                                                   " what you want rerun this command with --force.")
 
         self.add_host_label(hostname, '_no_schedule')
+        if not keep_conf_keyring:
+            self.add_host_label(hostname, '_no_conf_keyring')
 
         daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
 
index 14d02610ae3582241ab0d101abfea7014c8a08c8..07e02850ecd29565ee8d21fabaebeeede0948b53 100644 (file)
@@ -1133,9 +1133,9 @@ class CephadmServe:
                 pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts)
                 ha = HostAssignment(
                     spec=ServiceSpec('mon', placement=pspec),
-                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    hosts=self.mgr.cache.get_conf_keyring_available_hosts(),
                     unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
-                    draining_hosts=self.mgr.cache.get_draining_hosts(),
+                    draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(),
                     daemons=[],
                     networks=self.mgr.cache.networks,
                 )
@@ -1164,9 +1164,9 @@ class CephadmServe:
                     keyring.encode('utf-8')).digest())
                 ha = HostAssignment(
                     spec=ServiceSpec('mon', placement=ks.placement),
-                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    hosts=self.mgr.cache.get_conf_keyring_available_hosts(),
                     unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
-                    draining_hosts=self.mgr.cache.get_draining_hosts(),
+                    draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(),
                     daemons=[],
                     networks=self.mgr.cache.networks,
                 )
index 82a46ac84850cd0144de9d5948f8dd6e56d63b6f..aa38c8b66d0a7f7ddd2b5ba1213f419dfb8438ed 100644 (file)
@@ -8,7 +8,7 @@ import pytest
 
 from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
 from cephadm.serve import CephadmServe
-from cephadm.inventory import HostCacheStatus
+from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
 from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
 
 try:
@@ -2103,6 +2103,35 @@ osd_k2 = osd_v2
 
         assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf
 
+    def test_client_keyrings_special_host_labels(self, cephadm_module):
+        cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
+        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule']))
+        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring']))
+        # hosts need to be marked as having had refresh to be available for placement
+        # so "refresh" with empty daemon list
+        cephadm_module.cache.update_host_daemons('host1', {})
+        cephadm_module.cache.update_host_daemons('host2', {})
+        cephadm_module.cache.update_host_daemons('host3', {})
+
+        assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+        assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()]
+
+        assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+        assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()]
+
+        cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1')))
+
+        with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+            _mon_cmd.return_value = (0, 'real-keyring', '')
+            client_files = CephadmServe(cephadm_module)._calc_client_files()
+            assert 'host1' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys()
+            assert 'host2' in client_files.keys()
+            assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys()
+            assert 'host3' not in client_files.keys()
+
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         def check_registry_credentials(url, username, password):
index b1410b29dacc0f4d310e8ad7a752d39543ac416e..3ce81255432dab2c9ff3fc25253c65cdac524179 100644 (file)
@@ -367,7 +367,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def drain_host(self, hostname: str, force: bool = False) -> OrchResult[str]:
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> OrchResult[str]:
         """
         drain all daemons from a host
 
index d2f9416b6079385da06f914585bb23e0166e9135..a4ca7704f041a9a8055ca2cc4887b0f3b43a9fc5 100644 (file)
@@ -478,9 +478,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command('orch host drain')
-    def _drain_host(self, hostname: str, force: bool = False) -> HandleCommandResult:
+    def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> HandleCommandResult:
         """drain all daemons from a host"""
-        completion = self.drain_host(hostname, force)
+        completion = self.drain_host(hostname, force, keep_conf_keyring)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())