]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: add utils class for tracking special host labels
authorAdam King <adking@redhat.com>
Wed, 22 Feb 2023 19:07:58 +0000 (14:07 -0500)
committerAdam King <adking@redhat.com>
Wed, 23 Aug 2023 22:08:11 +0000 (18:08 -0400)
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 0e90c7e097c4dafafbb6b669949c2b1ea8de25c8)

src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/migrations.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/osd.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/utils.py

index deb1349798912e695ad5bc9d1aca84df205bc11f..32fdbcf2f5f269fb9acef724e0ab6540fce4823f 100644 (file)
@@ -17,7 +17,7 @@ from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
 from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 
-from .utils import resolve_ip
+from .utils import resolve_ip, SpecialHostLabels
 from .migrations import queue_migrate_nfs_spec, queue_migrate_rgw_spec
 
 if TYPE_CHECKING:
@@ -1003,56 +1003,60 @@ class HostCache():
             h for h in self.mgr.inventory.all_specs()
             if (
                 self.host_had_daemon_refresh(h.hostname)
-                and '_no_schedule' not in h.labels
+                and SpecialHostLabels.DRAIN_DAEMONS not in h.labels
             )
         ]
 
     def get_conf_keyring_available_hosts(self) -> List[HostSpec]:
         """
-        Returns all hosts without _no_conf_keyring label that
-        have had a refresh
+        Returns all hosts without the drain conf and keyrings
+        label (SpecialHostLabels.DRAIN_CONF_KEYRING) that have
+        had a refresh. That is equivalent to all hosts we
+        consider eligible for deployment of conf and keyring files
 
         Any host without that label is considered fair game for
-        a client keyring spec to match. We want to still wait
-        for refresh here so that we know what keyrings we've
+        a client keyring spec to match. However, we want to still
+        wait for refresh here so that we know what keyrings we've
         already deployed here
         """
         return [
             h for h in self.mgr.inventory.all_specs()
             if (
                 self.host_had_daemon_refresh(h.hostname)
-                and '_no_conf_keyring' not in h.labels
+                and SpecialHostLabels.DRAIN_CONF_KEYRING not in h.labels
             )
         ]
 
     def get_non_draining_hosts(self) -> List[HostSpec]:
         """
-        Returns all hosts that do not have _no_schedule label.
+        Returns all hosts that do not have drain daemon label
+        (SpecialHostLabels.DRAIN_DAEMONS).
 
         Useful for the agent who needs this specific list rather than the
         schedulable_hosts since the agent needs to be deployed on hosts with
         no daemon refresh
         """
         return [
-            h for h in self.mgr.inventory.all_specs() if '_no_schedule' not in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS not in h.labels
         ]
 
     def get_draining_hosts(self) -> List[HostSpec]:
         """
-        Returns all hosts that have _no_schedule label and therefore should have
-        no daemons placed on them, but are potentially still reachable
+        Returns all hosts that have the drain daemons label (SpecialHostLabels.DRAIN_DAEMONS)
+        and therefore should have no daemons placed on them, but are potentially still reachable
         """
         return [
-            h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS in h.labels
         ]
 
     def get_conf_keyring_draining_hosts(self) -> List[HostSpec]:
         """
-        Returns all hosts that have _no_conf_keyring label and therefore should have
-        no config files or client keyring placed on them, but are potentially still reachable
+        Returns all hosts that have drain conf and keyrings label (SpecialHostLabels.DRAIN_CONF_KEYRING)
+        and therefore should have no config files or client keyring placed on them, but are
+        potentially still reachable
         """
         return [
-            h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_CONF_KEYRING in h.labels
         ]
 
     def get_unreachable_hosts(self) -> List[HostSpec]:
index 52a8605bc1d148199b83b464fcd7fe961dd2283b..27f777af6b420a341ec4768980b499b0c229bb04 100644 (file)
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any, List
 
 from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec, RGWSpec
 from cephadm.schedule import HostAssignment
+from cephadm.utils import SpecialHostLabels
 import rados
 
 from mgr_module import NFS_POOL_NAME
@@ -308,7 +309,7 @@ class Migrations:
         if 'client.admin' not in self.mgr.keys.keys:
             self.mgr._client_keyring_set(
                 entity='client.admin',
-                placement='label:_admin',
+                placement=f'label:{SpecialHostLabels.ADMIN}',
             )
         return True
 
index 5518ddc52cae3d53075d374d746168e37fd96bd2..3fa6b4d8bd3e78ecc6e5afc0afc48add6e84675d 100644 (file)
@@ -69,7 +69,7 @@ from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore,
 from .upgrade import CephadmUpgrade
 from .template import TemplateMgr
 from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \
-    cephadmNoImage, CEPH_UPGRADE_ORDER
+    cephadmNoImage, CEPH_UPGRADE_ORDER, SpecialHostLabels
 from .configchecks import CephadmConfigChecks
 from .offline_watcher import OfflineHostWatcher
 from .tuned_profiles import TunedProfileUtils
@@ -1640,11 +1640,11 @@ Then run the following:
 
         # check, if there we're removing the last _admin host
         if not force:
-            p = PlacementSpec(label='_admin')
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
             admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
             if len(admin_hosts) == 1 and admin_hosts[0] == host:
-                raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
-                                                  " label. Please add the '_admin' label to a host"
+                raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'"
+                                                  f" label. Please add the '{SpecialHostLabels.ADMIN}' label to a host"
                                                   " or add --force to this command")
 
         def run_cmd(cmd_args: dict) -> None:
@@ -1729,14 +1729,14 @@ Then run the following:
     def remove_host_label(self, host: str, label: str, force: bool = False) -> str:
         # if we remove the _admin label from the only host that has it we could end up
         # removing the only instance of the config and keyring and cause issues
-        if not force and label == '_admin':
-            p = PlacementSpec(label='_admin')
+        if not force and label == SpecialHostLabels.ADMIN:
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
             admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
             if len(admin_hosts) == 1 and admin_hosts[0] == host:
-                raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
-                                                  " label.\nRemoving the _admin label from this host could cause the removal"
+                raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'"
+                                                  f" label.\nRemoving the {SpecialHostLabels.ADMIN} label from this host could cause the removal"
                                                   " of the last cluster config/keyring managed by cephadm.\n"
-                                                  "It is recommended to add the _admin label to another host"
+                                                  f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host"
                                                   " before completing this operation.\nIf you're certain this is"
                                                   " what you want rerun this command with --force.")
         self.inventory.rm_label(host, label)
@@ -3305,19 +3305,19 @@ Then run the following:
         # if we drain the last admin host we could end up removing the only instance
         # of the config and keyring and cause issues
         if not force:
-            p = PlacementSpec(label='_admin')
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
             admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
             if len(admin_hosts) == 1 and admin_hosts[0] == hostname:
-                raise OrchestratorValidationError(f"Host {hostname} is the last host with the '_admin'"
+                raise OrchestratorValidationError(f"Host {hostname} is the last host with the '{SpecialHostLabels.ADMIN}'"
                                                   " label.\nDraining this host could cause the removal"
                                                   " of the last cluster config/keyring managed by cephadm.\n"
-                                                  "It is recommended to add the _admin label to another host"
+                                                  f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host"
                                                   " before completing this operation.\nIf you're certain this is"
                                                   " what you want rerun this command with --force.")
 
         self.add_host_label(hostname, '_no_schedule')
         if not keep_conf_keyring:
-            self.add_host_label(hostname, '_no_conf_keyring')
+            self.add_host_label(hostname, SpecialHostLabels.DRAIN_CONF_KEYRING)
 
         daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
 
index b26d57805f37a8f70a97ec3f484ca431186b2251..de78b6b7f34d900aef39fc4d56f7461bb0164cb7 100644 (file)
@@ -20,7 +20,7 @@ from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.schedule import HostAssignment
 from cephadm.autotune import MemoryAutotuner
 from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
-    CephadmNoImage, CEPH_TYPES, ContainerInspectInfo
+    CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels
 from mgr_module import MonCommandFailed
 from mgr_util import format_bytes, verify_tls, get_cert_issuer_info, ServerConfigException
 
@@ -295,7 +295,7 @@ class CephadmServe:
 
             if (
                     self.mgr.cache.host_needs_autotune_memory(host)
-                    and not self.mgr.inventory.has_label(host, '_no_autotune_memory')
+                    and not self.mgr.inventory.has_label(host, SpecialHostLabels.NO_MEMORY_AUTOTUNE)
             ):
                 self.log.debug(f"autotuning memory for {host}")
                 self._autotune_host_memory(host)
index c8c8ac1a9b27e0ffc40e6f258cf34678860b837d..bfecc57230abc61fe312762470caa46a87734f77 100644 (file)
@@ -13,6 +13,7 @@ from ceph.utils import datetime_to_str, str_to_datetime
 from datetime import datetime
 import orchestrator
 from cephadm.serve import CephadmServe
+from cephadm.utils import SpecialHostLabels
 from ceph.utils import datetime_now
 from orchestrator import OrchestratorError, DaemonDescription
 from mgr_module import MonCommandFailed
@@ -42,7 +43,7 @@ class OSDService(CephService):
                     host, drive_group))
                 return None
             # skip this host if we cannot schedule here
-            if self.mgr.inventory.has_label(host, '_no_schedule'):
+            if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS):
                 return None
 
             osd_id_claims_for_host = osd_id_claims.filtered_by_host(host)
index 611f6020d67b6a06f1aea68ebc281ad8995bdb1d..8dd88f9f208141f71b51c068f1753eacf0cdcff0 100644 (file)
@@ -10,6 +10,7 @@ from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
 from cephadm.serve import CephadmServe
 from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
 from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+from cephadm.utils import SpecialHostLabels
 
 try:
     from typing import List
@@ -1921,8 +1922,8 @@ osd_k2 = osd_v2
 
     def test_client_keyrings_special_host_labels(self, cephadm_module):
         cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
-        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule']))
-        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring']))
+        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS]))
+        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING]))
         # hosts need to be marked as having had refresh to be available for placement
         # so "refresh" with empty daemon list
         cephadm_module.cache.update_host_daemons('host1', {})
@@ -2149,12 +2150,12 @@ Traceback (most recent call last):
     def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator):
         with pytest.raises(OrchestratorError):
             with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False):
-                cephadm_module.inventory.add_label('test', '_admin')
+                cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN)
                 pass
             assert False
         with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True):
             with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False):
-                cephadm_module.inventory.add_label('test2', '_admin')
+                cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN)
 
     @pytest.mark.parametrize("facts, settings, expected_value",
                              [
index 6f8e022d62f0ae71df008e5cf3e7f5be76199d8e..091c378147d8b793079a7e91488d031ee3e90436 100644 (file)
@@ -43,6 +43,16 @@ class ContainerInspectInfo(NamedTuple):
     repo_digests: Optional[List[str]]
 
 
+class SpecialHostLabels(str, Enum):
+    ADMIN: str = '_admin'
+    NO_MEMORY_AUTOTUNE: str = '_no_autotune_memory'
+    DRAIN_DAEMONS: str = '_no_schedule'
+    DRAIN_CONF_KEYRING: str = '_no_conf_keyring'
+
+    def to_json(self) -> str:
+        return self.value
+
+
 def name_to_config_section(name: str) -> ConfEntity:
     """
     Map from daemon names to ceph entity names (as seen in config)