From df79f0b7808b6bed3f26af4df2ac2e0d31de07ea Mon Sep 17 00:00:00 2001 From: Adam King Date: Tue, 21 Feb 2023 13:53:32 -0500 Subject: [PATCH] mgr/cephadm: allow draining host without removing conf/keyring files Fixes: https://tracker.ceph.com/issues/58820 Signed-off-by: Adam King (cherry picked from commit 871aefb11d0a736d66150fee40c213f4210fead4) --- doc/cephadm/host-management.rst | 17 +++++++++++ src/cephadm/tests/test_cephadm.py | 1 + src/pybind/mgr/cephadm/inventory.py | 27 +++++++++++++++++ src/pybind/mgr/cephadm/module.py | 5 ++-- src/pybind/mgr/cephadm/serve.py | 8 ++--- src/pybind/mgr/cephadm/tests/test_cephadm.py | 31 +++++++++++++++++++- src/pybind/mgr/orchestrator/_interface.py | 2 +- src/pybind/mgr/orchestrator/module.py | 4 +-- 8 files changed, 85 insertions(+), 10 deletions(-) diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst index 396ceb22d8900..5a5c838a94516 100644 --- a/doc/cephadm/host-management.rst +++ b/doc/cephadm/host-management.rst @@ -101,6 +101,17 @@ To drain all daemons from a host, run a command of the following form: The ``_no_schedule`` and ``_no_conf_keyring`` labels will be applied to the host. See :ref:`cephadm-special-host-labels`. +If you only want to drain daemons but leave managed ceph conf and keyring +files on the host, you may pass the ``--keep-conf-keyring`` flag to the +drain command. + +.. prompt:: bash # + + ceph orch host drain ** --keep-conf-keyring + +This will apply the ``_no_schedule`` label to the host but not the +``_no_conf_keyring`` label. + If you want to drain daemons but leave managed `ceph.conf` and keyring files on the host, you may pass the ``--keep-conf-keyring`` flag to the drain command. @@ -192,6 +203,12 @@ The following host labels have a special meaning to cephadm. All start with ``_ an existing host that already contains Ceph daemons, it will cause cephadm to move those daemons elsewhere (except OSDs, which are not removed automatically). +* ``_no_conf_keyring``: *Do not deploy config files or keyrings on this host*. + + This label is effectively the same as ``_no_schedule`` but instead of working for + daemons it works for client keyrings and ceph conf files that are being managed + by cephadm + * ``_no_autotune_memory``: *Do not autotune memory on this host*. This label will prevent daemon memory from being tuned even when the diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index e21fb2ff2f11c..755b7b0a51e7e 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -300,6 +300,7 @@ class TestCephAdm(object): @mock.patch('cephadm.migrate_sysctl_dir') @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None)) @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name') + @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (167, 167)) @mock.patch('cephadm.get_deployment_container') def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _get_parm, _deploy_daemon, _file_lock, _logger): """ diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 5cc18b6f48531..a829925bb9644 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -1007,6 +1007,24 @@ class HostCache(): ) ] + def get_conf_keyring_available_hosts(self) -> List[HostSpec]: + """ + Returns all hosts without _no_conf_keyring label that + have had a refresh + + Any host without that label is considered fair game for + a client keyring spec to match. We want to still wait + for refresh here so that we know what keyrings we've + already deployed here + """ + return [ + h for h in self.mgr.inventory.all_specs() + if ( + self.host_had_daemon_refresh(h.hostname) + and '_no_conf_keyring' not in h.labels + ) + ] + def get_non_draining_hosts(self) -> List[HostSpec]: """ Returns all hosts that do not have _no_schedule label. @@ -1028,6 +1046,15 @@ class HostCache(): h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels ] + def get_conf_keyring_draining_hosts(self) -> List[HostSpec]: + """ + Returns all hosts that have _no_conf_keyring label and therefore should have + no config files or client keyring placed on them, but are potentially still reachable + """ + return [ + h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels + ] + def get_unreachable_hosts(self) -> List[HostSpec]: """ Return all hosts that are offline or in maintenance mode. diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 08f28d3456f74..5c39ae2c526f9 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3189,8 +3189,7 @@ Then run the following: return self.to_remove_osds.all_osds() @handle_orch_error - def drain_host(self, hostname, force=False): - # type: (str, bool) -> str + def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> str: """ Drain all daemons from a host. :param host: host name @@ -3210,6 +3209,8 @@ Then run the following: " what you want rerun this command with --force.") self.add_host_label(hostname, '_no_schedule') + if not keep_conf_keyring: + self.add_host_label(hostname, '_no_conf_keyring') daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index b2467da731608..fe565da327f75 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1113,9 +1113,9 @@ class CephadmServe: pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts) ha = HostAssignment( spec=ServiceSpec('mon', placement=pspec), - hosts=self.mgr.cache.get_schedulable_hosts(), + hosts=self.mgr.cache.get_conf_keyring_available_hosts(), unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), - draining_hosts=self.mgr.cache.get_draining_hosts(), + draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(), daemons=[], networks=self.mgr.cache.networks, ) @@ -1144,9 +1144,9 @@ class CephadmServe: keyring.encode('utf-8')).digest()) ha = HostAssignment( spec=ServiceSpec('mon', placement=ks.placement), - hosts=self.mgr.cache.get_schedulable_hosts(), + hosts=self.mgr.cache.get_conf_keyring_available_hosts(), unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), - draining_hosts=self.mgr.cache.get_draining_hosts(), + draining_hosts=self.mgr.cache.get_conf_keyring_draining_hosts(), daemons=[], networks=self.mgr.cache.networks, ) diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 8e09eb9400116..58c119a829b5b 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -8,7 +8,7 @@ import pytest from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection from cephadm.serve import CephadmServe -from cephadm.inventory import HostCacheStatus +from cephadm.inventory import HostCacheStatus, ClientKeyringSpec from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims try: @@ -2001,6 +2001,35 @@ osd_k2 = osd_v2 assert cephadm_module.get_minimal_ceph_conf() == expected_combined_conf + def test_client_keyrings_special_host_labels(self, cephadm_module): + cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1'])) + cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule'])) + cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring'])) + # hosts need to be marked as having had refresh to be available for placement + # so "refresh" with empty daemon list + cephadm_module.cache.update_host_daemons('host1', {}) + cephadm_module.cache.update_host_daemons('host2', {}) + cephadm_module.cache.update_host_daemons('host3', {}) + + assert 'host1' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host2' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + assert 'host3' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_available_hosts()] + + assert 'host1' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host2' not in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + assert 'host3' in [h.hostname for h in cephadm_module.cache.get_conf_keyring_draining_hosts()] + + cephadm_module.keys.update(ClientKeyringSpec('keyring1', PlacementSpec(label='keyring1'))) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + _mon_cmd.return_value = (0, 'real-keyring', '') + client_files = CephadmServe(cephadm_module)._calc_client_files() + assert 'host1' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host1'].keys() + assert 'host2' in client_files.keys() + assert '/etc/ceph/ceph.keyring1.keyring' in client_files['host2'].keys() + assert 'host3' not in client_files.keys() + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator): def check_registry_credentials(url, username, password): diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index a58143807d180..02618d60db7a8 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -355,7 +355,7 @@ class Orchestrator(object): """ raise NotImplementedError() - def drain_host(self, hostname: str, force: bool = False) -> OrchResult[str]: + def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> OrchResult[str]: """ drain all daemons from a host diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 4da96ab6d762b..3206827f4506d 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -451,9 +451,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return HandleCommandResult(stdout=completion.result_str()) @_cli_write_command('orch host drain') - def _drain_host(self, hostname: str, force: bool = False) -> HandleCommandResult: + def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> HandleCommandResult: """drain all daemons from a host""" - completion = self.drain_host(hostname, force) + completion = self.drain_host(hostname, force, keep_conf_keyring) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) -- 2.39.5