mgr/cephadm: move schedulable, unreachable and non_draining hosts to HostCache

author Adam King <adking@redhat.com>

Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)

committer Adam King <adking@redhat.com>

Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)
author Adam King <adking@redhat.com>
Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)
committer Adam King <adking@redhat.com>
Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)
diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py

index cd9cd199f114c0b26c16a4ef984e2fce1730f9b6..ead4c0a3bee5ace2f3ca7ab3b085d7a72832ce18 100644 (file)
--- a/src/pybind/mgr/cephadm/agent.py
+++ b/src/pybind/mgr/cephadm/agent.py
@@ -297,7 +297,7 @@ class CephadmAgentHelpers:
      def _agent_down(self, host: str) -> bool:
          # if host is draining or drained (has _no_schedule label) there should not
          # be an agent deployed there and therefore we should return False
-        if host not in [h.hostname for h in self.mgr._non_draining_hosts()]:
+        if host not in [h.hostname for h in self.mgr.cache.get_non_draining_hosts()]:
              return False
          # if we don't have a timestamp, it's likely because of a mgr fail over.
          # just set the timestamp to now. However, if host was offline before, we
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py

index a637f4ae0b9109e3a505b84d845e5380a1e79ad4..002aaa52060df6400dd9676c227f01bcb67366f8 100644 (file)
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -741,6 +741,52 @@ class HostCache():
              r.append(host)
          return r
  
+    def get_schedulable_hosts(self) -> List[HostSpec]:
+        """
+        Returns all usable hosts that went through _refresh_host_daemons().
+
+        This mitigates a potential race, where new host was added *after*
+        ``_refresh_host_daemons()`` was called, but *before*
+        ``_apply_all_specs()`` was called. thus we end up with a hosts
+        where daemons might be running, but we have not yet detected them.
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs()
+            if (
+                self.host_had_daemon_refresh(h.hostname)
+                and '_no_schedule' not in h.labels
+            )
+        ]
+
+    def get_non_draining_hosts(self) -> List[HostSpec]:
+        """
+        Returns all hosts that do not have _no_schedule label.
+
+        Useful for the agent who needs this specific list rather than the
+        schedulable_hosts since the agent needs to be deployed on hosts with
+        no daemon refresh
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs() if '_no_schedule' not in h.labels
+        ]
+
+    def get_unreachable_hosts(self) -> List[HostSpec]:
+        """
+        Return all hosts that are offline or in maintenance mode.
+
+        The idea is we should not touch the daemons on these hosts (since
+        in theory the hosts are inaccessible so we CAN'T touch them) but
+        we still want to count daemons that exist on these hosts toward the
+        placement so daemons on these hosts aren't just moved elsewhere
+        """
+        return [
+            h for h in self.mgr.inventory.all_specs()
+            if (
+                h.status.lower() in ['maintenance', 'offline']
+                or h.hostname in self.mgr.offline_hosts
+            )
+        ]
+
      def get_facts(self, host: str) -> Dict[str, Any]:
          return self.facts.get(host, {})
  
@@ -953,7 +999,7 @@ class HostCache():
          return True
  
      def all_host_metadata_up_to_date(self) -> bool:
-        unreachables = [h.hostname for h in self.mgr._unreachable_hosts()]
+        unreachables = [h.hostname for h in self.get_unreachable_hosts()]
          if [h for h in self.get_hosts() if (not self.host_metadata_up_to_date(h) and h not in unreachables)]:
              # this function is primarily for telling if it's safe to try and apply a service
              # spec. Since offline/maintenance hosts aren't considered in that process anyway
diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py

index 941ede3666de3ece6c64a3cd0e70ed9ed4b61d46..e5a73f306896e10379d976a9686c848da7c696fe 100644 (file)
--- a/src/pybind/mgr/cephadm/migrations.py
+++ b/src/pybind/mgr/cephadm/migrations.py
@@ -103,7 +103,7 @@ class Migrations:
              placements, to_add, to_remove = HostAssignment(
                  spec=spec,
                  hosts=self.mgr.inventory.all_specs(),
-                unreachable_hosts=self.mgr._unreachable_hosts(),
+                unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
                  daemons=existing_daemons,
              ).place()
  
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index 3cc37845754a946e6ef35a5fe8aa071bcf2593cd..0dc18c8daf15c01d479d2a8bc01f99bd8084e5e1 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -1374,52 +1374,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
  
          return image
  
-    def _schedulable_hosts(self) -> List[HostSpec]:
-        """
-        Returns all usable hosts that went through _refresh_host_daemons().
-
-        This mitigates a potential race, where new host was added *after*
-        ``_refresh_host_daemons()`` was called, but *before*
-        ``_apply_all_specs()`` was called. thus we end up with a hosts
-        where daemons might be running, but we have not yet detected them.
-        """
-        return [
-            h for h in self.inventory.all_specs()
-            if (
-                self.cache.host_had_daemon_refresh(h.hostname)
-                and '_no_schedule' not in h.labels
-            )
-        ]
-
-    def _non_draining_hosts(self) -> List[HostSpec]:
-        """
-        Returns all hosts that do not have _no_schedule label.
-
-        Useful for the agent who needs this specific list rather than the
-        _schedulable_hosts since the agent needs to be deployed on hosts with
-        no daemon refresh
-        """
-        return [
-            h for h in self.inventory.all_specs() if '_no_schedule' not in h.labels
-        ]
-
-    def _unreachable_hosts(self) -> List[HostSpec]:
-        """
-        Return all hosts that are offline or in maintenance mode.
-
-        The idea is we should not touch the daemons on these hosts (since
-        in theory the hosts are inaccessible so we CAN'T touch them) but
-        we still want to count daemons that exist on these hosts toward the
-        placement so daemons on these hosts aren't just moved elsewhere
-        """
-        return [
-            h for h in self.inventory.all_specs()
-            if (
-                h.status.lower() in ['maintenance', 'offline']
-                or h.hostname in self.offline_hosts
-            )
-        ]
-
      def _check_valid_addr(self, host: str, addr: str) -> str:
          # make sure hostname is resolvable before trying to make a connection
          try:
@@ -1823,7 +1777,7 @@ Then run the following:
                  continue
              sm[nm] = orchestrator.ServiceDescription(
                  spec=spec,
-                size=spec.placement.get_target_count(self._schedulable_hosts()),
+                size=spec.placement.get_target_count(self.cache.get_schedulable_hosts()),
                  running=0,
                  events=self.events.get_for_service(spec.service_name()),
                  created=self.spec_store.spec_created[nm],
@@ -2400,8 +2354,8 @@ Then run the following:
          svc = self.cephadm_services[spec.service_type]
          ha = HostAssignment(
              spec=spec,
-            hosts=self._schedulable_hosts(),
-            unreachable_hosts=self._unreachable_hosts(),
+            hosts=self.cache.get_schedulable_hosts(),
+            unreachable_hosts=self.cache.get_unreachable_hosts(),
              networks=self.cache.networks,
              daemons=self.cache.get_daemons_by_service(spec.service_name()),
              allow_colo=svc.allow_colo(),
@@ -2477,7 +2431,7 @@ Then run the following:
          HostAssignment(
              spec=spec,
              hosts=self.inventory.all_specs(),  # All hosts, even those without daemon refresh
-            unreachable_hosts=self._unreachable_hosts(),
+            unreachable_hosts=self.cache.get_unreachable_hosts(),
              networks=self.cache.networks,
              daemons=self.cache.get_daemons_by_service(spec.service_name()),
              allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py

index ef15bab72b18084d91532b0b1a9d135a71d7d891..3559e687867d32586e72fbbbcbd8d1500d79a416 100644 (file)
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -209,8 +209,8 @@ class CephadmServe:
                  pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts)
                  ha = HostAssignment(
                      spec=ServiceSpec('mon', placement=pspec),
-                    hosts=self.mgr._schedulable_hosts(),
-                    unreachable_hosts=self.mgr._unreachable_hosts(),
+                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
                      daemons=[],
                      networks=self.mgr.cache.networks,
                  )
@@ -241,8 +241,8 @@ class CephadmServe:
                      keyring.encode('utf-8')).digest())
                  ha = HostAssignment(
                      spec=ServiceSpec('mon', placement=ks.placement),
-                    hosts=self.mgr._schedulable_hosts(),
-                    unreachable_hosts=self.mgr._unreachable_hosts(),
+                    hosts=self.mgr.cache.get_schedulable_hosts(),
+                    unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
                      daemons=[],
                      networks=self.mgr.cache.networks,
                  )
@@ -295,7 +295,7 @@ class CephadmServe:
                  if r is not None:
                      bad_hosts.append(r)
  
-            if not self.mgr.use_agent or host not in [h.hostname for h in self.mgr._non_draining_hosts()]:
+            if not self.mgr.use_agent or host not in [h.hostname for h in self.mgr.cache.get_non_draining_hosts()]:
                  if self.mgr.cache.host_needs_daemon_refresh(host):
                      self.log.debug('refreshing %s daemons' % host)
                      r = self._refresh_host_daemons(host)
@@ -717,8 +717,9 @@ class CephadmServe:
              rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {}
          ha = HostAssignment(
              spec=spec,
-            hosts=self.mgr._non_draining_hosts() if spec.service_name() == 'agent' else self.mgr._schedulable_hosts(),
-            unreachable_hosts=self.mgr._unreachable_hosts(),
+            hosts=self.mgr.cache.get_non_draining_hosts() if spec.service_name(
+            ) == 'agent' else self.mgr.cache.get_schedulable_hosts(),
+            unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
              daemons=daemons,
              networks=self.mgr.cache.networks,
              filter_new_host=(
@@ -909,7 +910,7 @@ class CephadmServe:
              if self.mgr.use_agent:
                  # can only send ack to agents if we know for sure port they bound to
                  hosts_altered = set([h for h in hosts_altered if (h in self.mgr.cache.agent_ports and h in [
-                                    h2.hostname for h2 in self.mgr._non_draining_hosts()])])
+                                    h2.hostname for h2 in self.mgr.cache.get_non_draining_hosts()])])
                  self.mgr.agent_helpers._request_agent_acks(hosts_altered, increment=True)
  
          if r is None:
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py

index 7b63fe7aa4e93486e4edaab47cad27a9a08e4d22..f5996eeae764b4faf5b8ccf99347fe93ff3c681c 100644 (file)
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -152,7 +152,7 @@ class OSDService(CephService):
      def prepare_drivegroup(self, drive_group: DriveGroupSpec) -> List[Tuple[str, DriveSelection]]:
          # 1) use fn_filter to determine matching_hosts
          matching_hosts = drive_group.placement.filter_matching_hostspecs(
-            self.mgr._schedulable_hosts())
+            self.mgr.cache.get_schedulable_hosts())
          # 2) Map the inventory to the InventoryHost object
          host_ds_map = []
  
@@ -261,7 +261,7 @@ class OSDService(CephService):
          if not osdspecs:
              self.mgr.log.debug("No OSDSpecs found")
              return []
-        return sum([spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()) for spec in osdspecs], [])
+        return sum([spec.placement.filter_matching_hostspecs(self.mgr.cache.get_schedulable_hosts()) for spec in osdspecs], [])
  
      def resolve_osdspecs_for_host(self, host: str,
                                    specs: Optional[List[DriveGroupSpec]] = None) -> List[DriveGroupSpec]:
@@ -271,7 +271,7 @@ class OSDService(CephService):
              specs = [cast(DriveGroupSpec, spec) for (sn, spec) in self.mgr.spec_store.spec_preview.items()
                       if spec.service_type == 'osd']
          for spec in specs:
-            if host in spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()):
+            if host in spec.placement.filter_matching_hostspecs(self.mgr.cache.get_schedulable_hosts()):
                  self.mgr.log.debug(f"Found OSDSpecs for host: <{host}> -> <{spec}>")
                  matching_specs.append(spec)
          return matching_specs
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py

index 50628ca3f78b9fe819b39663beb94864dd5feeb0..b25411f92f4e227ef6ff58ca5dad6cc20bc6c284 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -1127,11 +1127,12 @@ spec:
                          # being in offline/maint mode should disqualify hosts from being
                          # candidates for scheduling
                          candidates = [
-                            h.hostname for h in cephadm_module._schedulable_hosts()]
+                            h.hostname for h in cephadm_module.cache.get_schedulable_hosts()]
                          assert 'test2' in candidates
                          assert 'test3' in candidates
  
-                        unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()]
+                        unreachable = [
+                            h.hostname for h in cephadm_module.cache.get_unreachable_hosts()]
                          assert 'test2' in unreachable
                          assert 'test3' in unreachable
author	Adam King <adking@redhat.com>
	Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)
committer	Adam King <adking@redhat.com>
	Thu, 30 Sep 2021 14:04:14 +0000 (10:04 -0400)
src/pybind/mgr/cephadm/agent.py		patch \| blob \| history
src/pybind/mgr/cephadm/inventory.py		patch \| blob \| history
src/pybind/mgr/cephadm/migrations.py		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/serve.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/osd.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_cephadm.py		patch \| blob \| history