From 0e90c7e097c4dafafbb6b669949c2b1ea8de25c8 Mon Sep 17 00:00:00 2001 From: Adam King Date: Wed, 22 Feb 2023 14:07:58 -0500 Subject: [PATCH] mgr/cephadm: add utils class for tracking special host labels Signed-off-by: Adam King --- src/pybind/mgr/cephadm/inventory.py | 34 +++++++++++--------- src/pybind/mgr/cephadm/migrations.py | 3 +- src/pybind/mgr/cephadm/module.py | 26 +++++++-------- src/pybind/mgr/cephadm/serve.py | 4 +-- src/pybind/mgr/cephadm/services/osd.py | 3 +- src/pybind/mgr/cephadm/tests/test_cephadm.py | 9 +++--- src/pybind/mgr/cephadm/utils.py | 10 ++++++ 7 files changed, 53 insertions(+), 36 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index a829925bb9644..7153ca6dcde37 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -17,7 +17,7 @@ from ceph.utils import str_to_datetime, datetime_to_str, datetime_now from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types from cephadm.services.cephadmservice import CephadmDaemonDeploySpec -from .utils import resolve_ip +from .utils import resolve_ip, SpecialHostLabels from .migrations import queue_migrate_nfs_spec, queue_migrate_rgw_spec if TYPE_CHECKING: @@ -1003,56 +1003,60 @@ class HostCache(): h for h in self.mgr.inventory.all_specs() if ( self.host_had_daemon_refresh(h.hostname) - and '_no_schedule' not in h.labels + and SpecialHostLabels.DRAIN_DAEMONS not in h.labels ) ] def get_conf_keyring_available_hosts(self) -> List[HostSpec]: """ - Returns all hosts without _no_conf_keyring label that - have had a refresh + Returns all hosts without the drain conf and keyrings + label (SpecialHostLabels.DRAIN_CONF_KEYRING) that have + had a refresh. That is equivalent to all hosts we + consider eligible for deployment of conf and keyring files Any host without that label is considered fair game for - a client keyring spec to match. We want to still wait - for refresh here so that we know what keyrings we've + a client keyring spec to match. However, we want to still + wait for refresh here so that we know what keyrings we've already deployed here """ return [ h for h in self.mgr.inventory.all_specs() if ( self.host_had_daemon_refresh(h.hostname) - and '_no_conf_keyring' not in h.labels + and SpecialHostLabels.DRAIN_CONF_KEYRING not in h.labels ) ] def get_non_draining_hosts(self) -> List[HostSpec]: """ - Returns all hosts that do not have _no_schedule label. + Returns all hosts that do not have drain daemon label + (SpecialHostLabels.DRAIN_DAEMONS). Useful for the agent who needs this specific list rather than the schedulable_hosts since the agent needs to be deployed on hosts with no daemon refresh """ return [ - h for h in self.mgr.inventory.all_specs() if '_no_schedule' not in h.labels + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS not in h.labels ] def get_draining_hosts(self) -> List[HostSpec]: """ - Returns all hosts that have _no_schedule label and therefore should have - no daemons placed on them, but are potentially still reachable + Returns all hosts that have the drain daemons label (SpecialHostLabels.DRAIN_DAEMONS) + and therefore should have no daemons placed on them, but are potentially still reachable """ return [ - h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS in h.labels ] def get_conf_keyring_draining_hosts(self) -> List[HostSpec]: """ - Returns all hosts that have _no_conf_keyring label and therefore should have - no config files or client keyring placed on them, but are potentially still reachable + Returns all hosts that have drain conf and keyrings label (SpecialHostLabels.DRAIN_CONF_KEYRING) + and therefore should have no config files or client keyring placed on them, but are + potentially still reachable """ return [ - h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels + h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_CONF_KEYRING in h.labels ] def get_unreachable_hosts(self) -> List[HostSpec]: diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index 52a8605bc1d14..27f777af6b420 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any, List from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec, RGWSpec from cephadm.schedule import HostAssignment +from cephadm.utils import SpecialHostLabels import rados from mgr_module import NFS_POOL_NAME @@ -308,7 +309,7 @@ class Migrations: if 'client.admin' not in self.mgr.keys.keys: self.mgr._client_keyring_set( entity='client.admin', - placement='label:_admin', + placement=f'label:{SpecialHostLabels.ADMIN}', ) return True diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 0b1283a882a29..4a12894540835 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -70,7 +70,7 @@ from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore, from .upgrade import CephadmUpgrade from .template import TemplateMgr from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \ - cephadmNoImage, CEPH_UPGRADE_ORDER + cephadmNoImage, CEPH_UPGRADE_ORDER, SpecialHostLabels from .configchecks import CephadmConfigChecks from .offline_watcher import OfflineHostWatcher from .tuned_profiles import TunedProfileUtils @@ -1631,11 +1631,11 @@ Then run the following: # check, if there we're removing the last _admin host if not force: - p = PlacementSpec(label='_admin') + p = PlacementSpec(label=SpecialHostLabels.ADMIN) admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) if len(admin_hosts) == 1 and admin_hosts[0] == host: - raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'" - " label. Please add the '_admin' label to a host" + raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'" + f" label. Please add the '{SpecialHostLabels.ADMIN}' label to a host" " or add --force to this command") def run_cmd(cmd_args: dict) -> None: @@ -1722,14 +1722,14 @@ Then run the following: def remove_host_label(self, host: str, label: str, force: bool = False) -> str: # if we remove the _admin label from the only host that has it we could end up # removing the only instance of the config and keyring and cause issues - if not force and label == '_admin': - p = PlacementSpec(label='_admin') + if not force and label == SpecialHostLabels.ADMIN: + p = PlacementSpec(label=SpecialHostLabels.ADMIN) admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) if len(admin_hosts) == 1 and admin_hosts[0] == host: - raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'" - " label.\nRemoving the _admin label from this host could cause the removal" + raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'" + f" label.\nRemoving the {SpecialHostLabels.ADMIN} label from this host could cause the removal" " of the last cluster config/keyring managed by cephadm.\n" - "It is recommended to add the _admin label to another host" + f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host" " before completing this operation.\nIf you're certain this is" " what you want rerun this command with --force.") if self.inventory.has_label(host, label): @@ -3341,19 +3341,19 @@ Then run the following: # if we drain the last admin host we could end up removing the only instance # of the config and keyring and cause issues if not force: - p = PlacementSpec(label='_admin') + p = PlacementSpec(label=SpecialHostLabels.ADMIN) admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) if len(admin_hosts) == 1 and admin_hosts[0] == hostname: - raise OrchestratorValidationError(f"Host {hostname} is the last host with the '_admin'" + raise OrchestratorValidationError(f"Host {hostname} is the last host with the '{SpecialHostLabels.ADMIN}'" " label.\nDraining this host could cause the removal" " of the last cluster config/keyring managed by cephadm.\n" - "It is recommended to add the _admin label to another host" + f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host" " before completing this operation.\nIf you're certain this is" " what you want rerun this command with --force.") self.add_host_label(hostname, '_no_schedule') if not keep_conf_keyring: - self.add_host_label(hostname, '_no_conf_keyring') + self.add_host_label(hostname, SpecialHostLabels.DRAIN_CONF_KEYRING) daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 07e02850ecd29..d013533bc749f 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -27,7 +27,7 @@ from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.schedule import HostAssignment from cephadm.autotune import MemoryAutotuner from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ - CephadmNoImage, CEPH_TYPES, ContainerInspectInfo + CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels from mgr_module import MonCommandFailed from mgr_util import format_bytes, verify_tls, get_cert_issuer_info, ServerConfigException @@ -303,7 +303,7 @@ class CephadmServe: if ( self.mgr.cache.host_needs_autotune_memory(host) - and not self.mgr.inventory.has_label(host, '_no_autotune_memory') + and not self.mgr.inventory.has_label(host, SpecialHostLabels.NO_MEMORY_AUTOTUNE) ): self.log.debug(f"autotuning memory for {host}") self._autotune_host_memory(host) diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index c8c8ac1a9b27e..bfecc57230abc 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -13,6 +13,7 @@ from ceph.utils import datetime_to_str, str_to_datetime from datetime import datetime import orchestrator from cephadm.serve import CephadmServe +from cephadm.utils import SpecialHostLabels from ceph.utils import datetime_now from orchestrator import OrchestratorError, DaemonDescription from mgr_module import MonCommandFailed @@ -42,7 +43,7 @@ class OSDService(CephService): host, drive_group)) return None # skip this host if we cannot schedule here - if self.mgr.inventory.has_label(host, '_no_schedule'): + if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS): return None osd_id_claims_for_host = osd_id_claims.filtered_by_host(host) diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index aa38c8b66d0a7..6cc06cedcf47a 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -10,6 +10,7 @@ from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection from cephadm.serve import CephadmServe from cephadm.inventory import HostCacheStatus, ClientKeyringSpec from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims +from cephadm.utils import SpecialHostLabels try: from typing import List @@ -2105,8 +2106,8 @@ osd_k2 = osd_v2 def test_client_keyrings_special_host_labels(self, cephadm_module): cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1'])) - cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule'])) - cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring'])) + cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS])) + cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING])) # hosts need to be marked as having had refresh to be available for placement # so "refresh" with empty daemon list cephadm_module.cache.update_host_daemons('host1', {}) @@ -2333,12 +2334,12 @@ Traceback (most recent call last): def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator): with pytest.raises(OrchestratorError): with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False): - cephadm_module.inventory.add_label('test', '_admin') + cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN) pass assert False with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True): with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False): - cephadm_module.inventory.add_label('test2', '_admin') + cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN) @pytest.mark.parametrize("facts, settings, expected_value", [ diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py index c68351a620d8d..e920179710ce7 100644 --- a/src/pybind/mgr/cephadm/utils.py +++ b/src/pybind/mgr/cephadm/utils.py @@ -43,6 +43,16 @@ class ContainerInspectInfo(NamedTuple): repo_digests: Optional[List[str]] +class SpecialHostLabels(str, Enum): + ADMIN: str = '_admin' + NO_MEMORY_AUTOTUNE: str = '_no_autotune_memory' + DRAIN_DAEMONS: str = '_no_schedule' + DRAIN_CONF_KEYRING: str = '_no_conf_keyring' + + def to_json(self) -> str: + return self.value + + def name_to_config_section(name: str) -> ConfEntity: """ Map from daemon names to ceph entity names (as seen in config) -- 2.39.5