mgr/cephadm: speed up upgrade when using agent

author Adam King <adking@redhat.com>

Thu, 2 Dec 2021 13:27:10 +0000 (08:27 -0500)

committer Adam King <adking@redhat.com>

Wed, 15 Dec 2021 16:43:16 +0000 (11:43 -0500)
author Adam King <adking@redhat.com>
Thu, 2 Dec 2021 13:27:10 +0000 (08:27 -0500)
committer Adam King <adking@redhat.com>
Wed, 15 Dec 2021 16:43:16 +0000 (11:43 -0500)
diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py

index c1609e042be4668b05214742e554622c66b3c163..e537e5978b7397ff884c6b65acfe1a8a42b95bb7 100644 (file)
--- a/src/pybind/mgr/cephadm/agent.py
+++ b/src/pybind/mgr/cephadm/agent.py
@@ -356,6 +356,12 @@ class CephadmAgentHelpers:
                  host, self.mgr.cache.agent_ports[host], {'counter': self.mgr.cache.agent_counter[host]}, self.mgr)
              message_thread.start()
  
+    def _request_ack_all_not_up_to_date(self) -> None:
+        self.mgr.agent_helpers._request_agent_acks(
+            set([h for h in self.mgr.cache.get_hosts() if
+                 (not self.mgr.cache.host_metadata_up_to_date(h)
+                 and h in self.mgr.cache.agent_ports and not self.mgr.cache.messaging_agent(h))]))
+
      def _agent_down(self, host: str) -> bool:
          # if host is draining or drained (has _no_schedule label) there should not
          # be an agent deployed there and therefore we should return False
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py

index 487d9c84d67654b39d4b4802161034a595e03edd..a075d8bfc5abf47bb2d60af886681f1ad50a23f1 100644 (file)
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -521,13 +521,6 @@ class CephadmUpgrade:
          target_digests = self.upgrade_state.target_digests
          target_version = self.upgrade_state.target_version
  
-        if self.mgr.use_agent and not self.mgr.cache.all_host_metadata_up_to_date():
-            # need to wait for metadata to come in
-            self.mgr.agent_helpers._request_agent_acks(
-                set([h for h in self.mgr.cache.get_hosts() if
-                     (not self.mgr.cache.host_metadata_up_to_date(h) and h in self.mgr.cache.agent_ports and not self.mgr.cache.messaging_agent(h))]))
-            return
-
          first = False
          if not target_id or not target_version or not target_digests:
              # need to learn the container hash
@@ -612,6 +605,9 @@ class CephadmUpgrade:
                      continue
                  assert d.daemon_type is not None
                  assert d.daemon_id is not None
+                assert d.hostname is not None
+                if self.mgr.use_agent and not self.mgr.cache.host_metadata_up_to_date(d.hostname):
+                    continue
                  correct_digest = False
                  if (any(d in target_digests for d in (d.container_image_digests or []))
                          or d.daemon_type in MONITORING_STACK_TYPES):
@@ -827,8 +823,6 @@ class CephadmUpgrade:
                          'who': section,
                      })
  
-            logger.debug('Upgrade: All %s daemons are up to date.' % daemon_type)
-
              # complete osd upgrade?
              if daemon_type == 'osd':
                  osdmap = self.mgr.get("osd_map")
@@ -881,6 +875,13 @@ class CephadmUpgrade:
                      self.upgrade_state.fs_original_allow_standby_replay = {}
                      self._save_upgrade_state()
  
+            # Make sure all metadata is up to date before saying we are done upgrading this daemon type
+            if self.mgr.use_agent and not self.mgr.cache.all_host_metadata_up_to_date():
+                self.mgr.agent_helpers._request_ack_all_not_up_to_date()
+                return
+
+            logger.debug('Upgrade: All %s daemons are up to date.' % daemon_type)
+
          # clean up
          logger.info('Upgrade: Finalizing container_image settings')
          self.mgr.set_container_image('global', target_image)
author	Adam King <adking@redhat.com>
	Thu, 2 Dec 2021 13:27:10 +0000 (08:27 -0500)
committer	Adam King <adking@redhat.com>
	Wed, 15 Dec 2021 16:43:16 +0000 (11:43 -0500)
src/pybind/mgr/cephadm/agent.py		patch \| blob \| history
src/pybind/mgr/cephadm/upgrade.py		patch \| blob \| history