mgr/cephadm: add utils class for tracking special host labels

author Adam King <adking@redhat.com>

Wed, 22 Feb 2023 19:07:58 +0000 (14:07 -0500)

committer Adam King <adking@redhat.com>

Fri, 15 Mar 2024 19:21:35 +0000 (15:21 -0400)
author Adam King <adking@redhat.com>
Wed, 22 Feb 2023 19:07:58 +0000 (14:07 -0500)
committer Adam King <adking@redhat.com>
Fri, 15 Mar 2024 19:21:35 +0000 (15:21 -0400)
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py

index a829925bb964422b0b1c147869fbc5f7f872c8a2..7153ca6dcde37e34bfba87a932b04acfc8263fe8 100644 (file)
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -17,7 +17,7 @@ from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
  from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
  from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
  
-from .utils import resolve_ip
+from .utils import resolve_ip, SpecialHostLabels
  from .migrations import queue_migrate_nfs_spec, queue_migrate_rgw_spec
  
  if TYPE_CHECKING:
@@ -1003,56 +1003,60 @@ class HostCache():
              h for h in self.mgr.inventory.all_specs()
              if (
                  self.host_had_daemon_refresh(h.hostname)
-                and '_no_schedule' not in h.labels
+                and SpecialHostLabels.DRAIN_DAEMONS not in h.labels
              )
          ]
  
      def get_conf_keyring_available_hosts(self) -> List[HostSpec]:
          """
-        Returns all hosts without _no_conf_keyring label that
-        have had a refresh
+        Returns all hosts without the drain conf and keyrings
+        label (SpecialHostLabels.DRAIN_CONF_KEYRING) that have
+        had a refresh. That is equivalent to all hosts we
+        consider eligible for deployment of conf and keyring files
  
          Any host without that label is considered fair game for
-        a client keyring spec to match. We want to still wait
-        for refresh here so that we know what keyrings we've
+        a client keyring spec to match. However, we want to still
+        wait for refresh here so that we know what keyrings we've
          already deployed here
          """
          return [
              h for h in self.mgr.inventory.all_specs()
              if (
                  self.host_had_daemon_refresh(h.hostname)
-                and '_no_conf_keyring' not in h.labels
+                and SpecialHostLabels.DRAIN_CONF_KEYRING not in h.labels
              )
          ]
  
      def get_non_draining_hosts(self) -> List[HostSpec]:
          """
-        Returns all hosts that do not have _no_schedule label.
+        Returns all hosts that do not have drain daemon label
+        (SpecialHostLabels.DRAIN_DAEMONS).
  
          Useful for the agent who needs this specific list rather than the
          schedulable_hosts since the agent needs to be deployed on hosts with
          no daemon refresh
          """
          return [
-            h for h in self.mgr.inventory.all_specs() if '_no_schedule' not in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS not in h.labels
          ]
  
      def get_draining_hosts(self) -> List[HostSpec]:
          """
-        Returns all hosts that have _no_schedule label and therefore should have
-        no daemons placed on them, but are potentially still reachable
+        Returns all hosts that have the drain daemons label (SpecialHostLabels.DRAIN_DAEMONS)
+        and therefore should have no daemons placed on them, but are potentially still reachable
          """
          return [
-            h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_DAEMONS in h.labels
          ]
  
      def get_conf_keyring_draining_hosts(self) -> List[HostSpec]:
          """
-        Returns all hosts that have _no_conf_keyring label and therefore should have
-        no config files or client keyring placed on them, but are potentially still reachable
+        Returns all hosts that have drain conf and keyrings label (SpecialHostLabels.DRAIN_CONF_KEYRING)
+        and therefore should have no config files or client keyring placed on them, but are
+        potentially still reachable
          """
          return [
-            h for h in self.mgr.inventory.all_specs() if '_no_conf_keyring' in h.labels
+            h for h in self.mgr.inventory.all_specs() if SpecialHostLabels.DRAIN_CONF_KEYRING in h.labels
          ]
  
      def get_unreachable_hosts(self) -> List[HostSpec]:
diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py

index 52a8605bc1d148199b83b464fcd7fe961dd2283b..27f777af6b420a341ec4768980b499b0c229bb04 100644 (file)
--- a/src/pybind/mgr/cephadm/migrations.py
+++ b/src/pybind/mgr/cephadm/migrations.py
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any, List
  
  from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec, RGWSpec
  from cephadm.schedule import HostAssignment
+from cephadm.utils import SpecialHostLabels
  import rados
  
  from mgr_module import NFS_POOL_NAME
@@ -308,7 +309,7 @@ class Migrations:
          if 'client.admin' not in self.mgr.keys.keys:
              self.mgr._client_keyring_set(
                  entity='client.admin',
-                placement='label:_admin',
+                placement=f'label:{SpecialHostLabels.ADMIN}',
              )
          return True
  
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index 5c39ae2c526f9dfecd6c85ccb4561d38b019ceec..b8fdeba593fe9427c57eb8cbdd57f5d5267df40b 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -65,7 +65,7 @@ from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore,
  from .upgrade import CephadmUpgrade
  from .template import TemplateMgr
  from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \
-    cephadmNoImage, CEPH_UPGRADE_ORDER
+    cephadmNoImage, CEPH_UPGRADE_ORDER, SpecialHostLabels
  from .configchecks import CephadmConfigChecks
  from .offline_watcher import OfflineHostWatcher
  from .tuned_profiles import TunedProfileUtils
@@ -1587,11 +1587,11 @@ Then run the following:
  
          # check, if there we're removing the last _admin host
          if not force:
-            p = PlacementSpec(label='_admin')
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
              admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
              if len(admin_hosts) == 1 and admin_hosts[0] == host:
-                raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
-                                                  " label. Please add the '_admin' label to a host"
+                raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'"
+                                                  f" label. Please add the '{SpecialHostLabels.ADMIN}' label to a host"
                                                    " or add --force to this command")
  
          def run_cmd(cmd_args: dict) -> None:
@@ -1676,14 +1676,14 @@ Then run the following:
      def remove_host_label(self, host: str, label: str, force: bool = False) -> str:
          # if we remove the _admin label from the only host that has it we could end up
          # removing the only instance of the config and keyring and cause issues
-        if not force and label == '_admin':
-            p = PlacementSpec(label='_admin')
+        if not force and label == SpecialHostLabels.ADMIN:
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
              admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
              if len(admin_hosts) == 1 and admin_hosts[0] == host:
-                raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
-                                                  " label.\nRemoving the _admin label from this host could cause the removal"
+                raise OrchestratorValidationError(f"Host {host} is the last host with the '{SpecialHostLabels.ADMIN}'"
+                                                  f" label.\nRemoving the {SpecialHostLabels.ADMIN} label from this host could cause the removal"
                                                    " of the last cluster config/keyring managed by cephadm.\n"
-                                                  "It is recommended to add the _admin label to another host"
+                                                  f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host"
                                                    " before completing this operation.\nIf you're certain this is"
                                                    " what you want rerun this command with --force.")
          self.inventory.rm_label(host, label)
@@ -3198,19 +3198,19 @@ Then run the following:
          # if we drain the last admin host we could end up removing the only instance
          # of the config and keyring and cause issues
          if not force:
-            p = PlacementSpec(label='_admin')
+            p = PlacementSpec(label=SpecialHostLabels.ADMIN)
              admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
              if len(admin_hosts) == 1 and admin_hosts[0] == hostname:
-                raise OrchestratorValidationError(f"Host {hostname} is the last host with the '_admin'"
+                raise OrchestratorValidationError(f"Host {hostname} is the last host with the '{SpecialHostLabels.ADMIN}'"
                                                    " label.\nDraining this host could cause the removal"
                                                    " of the last cluster config/keyring managed by cephadm.\n"
-                                                  "It is recommended to add the _admin label to another host"
+                                                  f"It is recommended to add the {SpecialHostLabels.ADMIN} label to another host"
                                                    " before completing this operation.\nIf you're certain this is"
                                                    " what you want rerun this command with --force.")
  
          self.add_host_label(hostname, '_no_schedule')
          if not keep_conf_keyring:
-            self.add_host_label(hostname, '_no_conf_keyring')
+            self.add_host_label(hostname, SpecialHostLabels.DRAIN_CONF_KEYRING)
  
          daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
  
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py

index fe565da327f756e9cb6d84203b2478e875774cdc..0014e5421239a7a4bb69262f0c6cb6727fd7de43 100644 (file)
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -26,7 +26,7 @@ from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
  from cephadm.schedule import HostAssignment
  from cephadm.autotune import MemoryAutotuner
  from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
-    CephadmNoImage, CEPH_TYPES, ContainerInspectInfo
+    CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels
  from mgr_module import MonCommandFailed
  from mgr_util import format_bytes
  
@@ -266,7 +266,7 @@ class CephadmServe:
  
              if (
                      self.mgr.cache.host_needs_autotune_memory(host)
-                    and not self.mgr.inventory.has_label(host, '_no_autotune_memory')
+                    and not self.mgr.inventory.has_label(host, SpecialHostLabels.NO_MEMORY_AUTOTUNE)
              ):
                  self.log.debug(f"autotuning memory for {host}")
                  self._autotune_host_memory(host)
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py

index d777d7fc6fe43c4381571da439ec6ae01176f88e..7b333b534ca626e28e4b053546ffbb71372c3917 100644 (file)
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -13,6 +13,7 @@ from ceph.utils import datetime_to_str, str_to_datetime
  from datetime import datetime
  import orchestrator
  from cephadm.serve import CephadmServe
+from cephadm.utils import SpecialHostLabels
  from ceph.utils import datetime_now
  from orchestrator import OrchestratorError, DaemonDescription
  from mgr_module import MonCommandFailed
@@ -42,7 +43,7 @@ class OSDService(CephService):
                      host, drive_group))
                  return None
              # skip this host if we cannot schedule here
-            if self.mgr.inventory.has_label(host, '_no_schedule'):
+            if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS):
                  return None
  
              osd_id_claims_for_host = osd_id_claims.filtered_by_host(host)
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py

index 58c119a829b5bf4f19c3e37fb799dc45d92eaed6..55a0dd02a36ea800b682fc94d9e70a8be8dc9baf 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -10,6 +10,7 @@ from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
  from cephadm.serve import CephadmServe
  from cephadm.inventory import HostCacheStatus, ClientKeyringSpec
  from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+from cephadm.utils import SpecialHostLabels
  
  try:
      from typing import List
@@ -2003,8 +2004,8 @@ osd_k2 = osd_v2
  
      def test_client_keyrings_special_host_labels(self, cephadm_module):
          cephadm_module.inventory.add_host(HostSpec('host1', labels=['keyring1']))
-        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', '_no_schedule']))
-        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', '_no_schedule', '_no_conf_keyring']))
+        cephadm_module.inventory.add_host(HostSpec('host2', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS]))
+        cephadm_module.inventory.add_host(HostSpec('host3', labels=['keyring1', SpecialHostLabels.DRAIN_DAEMONS, SpecialHostLabels.DRAIN_CONF_KEYRING]))
          # hosts need to be marked as having had refresh to be available for placement
          # so "refresh" with empty daemon list
          cephadm_module.cache.update_host_daemons('host1', {})
@@ -2231,12 +2232,12 @@ Traceback (most recent call last):
      def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator):
          with pytest.raises(OrchestratorError):
              with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False):
-                cephadm_module.inventory.add_label('test', '_admin')
+                cephadm_module.inventory.add_label('test', SpecialHostLabels.ADMIN)
                  pass
              assert False
          with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True):
              with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False):
-                cephadm_module.inventory.add_label('test2', '_admin')
+                cephadm_module.inventory.add_label('test2', SpecialHostLabels.ADMIN)
  
      @pytest.mark.parametrize("facts, settings, expected_value",
                               [
diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py

index 658e3a5ec940947caa3ecbda6e73b629ee16d16d..063fb19c6b9f85c72a5e648e8f18485e7926d8c2 100644 (file)
--- a/src/pybind/mgr/cephadm/utils.py
+++ b/src/pybind/mgr/cephadm/utils.py
@@ -42,6 +42,16 @@ class ContainerInspectInfo(NamedTuple):
      repo_digests: Optional[List[str]]
  
  
+class SpecialHostLabels(str, Enum):
+    ADMIN: str = '_admin'
+    NO_MEMORY_AUTOTUNE: str = '_no_autotune_memory'
+    DRAIN_DAEMONS: str = '_no_schedule'
+    DRAIN_CONF_KEYRING: str = '_no_conf_keyring'
+
+    def to_json(self) -> str:
+        return self.value
+
+
  def name_to_config_section(name: str) -> ConfEntity:
      """
      Map from daemon names to ceph entity names (as seen in config)
author	Adam King <adking@redhat.com>
	Wed, 22 Feb 2023 19:07:58 +0000 (14:07 -0500)
committer	Adam King <adking@redhat.com>
	Fri, 15 Mar 2024 19:21:35 +0000 (15:21 -0400)
src/pybind/mgr/cephadm/inventory.py		patch \| blob \| history
src/pybind/mgr/cephadm/migrations.py		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/serve.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/osd.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_cephadm.py		patch \| blob \| history
src/pybind/mgr/cephadm/utils.py		patch \| blob \| history