]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: upgrade: pull image after upgrade start, and for each host
authorSage Weil <sage@redhat.com>
Sat, 25 Jan 2020 22:09:36 +0000 (16:09 -0600)
committerSage Weil <sage@redhat.com>
Mon, 27 Jan 2020 17:30:32 +0000 (11:30 -0600)
Make 'upgrade start' return quickly, without first pulling the image.

Pull the image once to establish the image_id.

For each host, before updating the container, ensure the local image is
up to date, and if not, pull.

If a pull returns a different image_id, restart upgrade process.  (This
could live-lock if two hosts have different container registries that
return different image ids for the same image name. :/)

Signed-off-by: Sage Weil <sage@redhat.com>
src/pybind/mgr/cephadm/module.py

index c5d74910d1d3bad8d9c0ad9f571108ed5bf3be59..43e69fd9b0c3bcdae5f66c5af25c7c4959fa1b13 100644 (file)
@@ -419,7 +419,14 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin):
             return None
 
         target_name = self.upgrade_state.get('target_name')
-        target_id = self.upgrade_state.get('target_id')
+        target_id = self.upgrade_state.get('target_id', None)
+        if not target_id:
+            # need to learn the container hash
+            self.log.info('Upgrade: First pull of %s' % target_name)
+            target_id, target_version = self._get_container_image_id(target_name)
+            self.upgrade_state['target_id'] = target_id
+            self.upgrade_state['target_version'] = target_version
+            self._save_upgrade_state()
         target_version = self.upgrade_state.get('target_version')
         self.log.info('Upgrade: Target is %s with id %s' % (target_name,
                                                             target_id))
@@ -455,6 +462,29 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin):
                     need_upgrade_self = True
                     continue
 
+                # make sure host has latest container image
+                out, err, code = self._run_cephadm(
+                    d.nodename, None, 'inspect-image', [],
+                    image=target_name, no_fsid=True)
+                self.log.debug('out %s code %s' % (out, code))
+                if code or json.loads(''.join(out)).get('image_id') != target_id:
+                    self.log.info('Upgrade: Pulling %s on %s' % (target_name,
+                                                                 d.nodename))
+                    out, err, code = self._run_cephadm(
+                        d.nodename, None, 'pull', [],
+                        image=target_name, no_fsid=True)
+                    if code:
+                        self.log.warning('Upgrade: failed to pull %s on %s' % (
+                            target_name, d.nodename))
+                        # FIXME
+                        continue
+                    r = json.loads(''.join(out))
+                    if r.get('image_id') != target_id:
+                        self.log.info('Upgrade: image %s pull on %s got new image %s (not %s), restarting' % (target_name, d.nodename, r['image_id'], target_id))
+                        self.upgrade_state['image_id'] = r['image_id']
+                        self._save_upgrade_state()
+                        return None
+
                 if not self._wait_for_ok_to_stop(d):
                     return None
                 self.log.info('Upgrade: Redeploying %s.%s' %
@@ -1844,16 +1874,13 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin):
                                       self.upgrade_state.get('target_name'))
             return trivial_result('Upgrade to %s in progress' %
                                   self.upgrade_state.get('target_name'))
-        target_id, target_version = self._get_container_image_id(target_name)
         self.upgrade_state = {
             'target_name': target_name,
-            'target_id': target_id,
-            'target_version': target_version,
         }
         self._save_upgrade_state()
         self._clear_health_checks()
         self.event.set()
-        return trivial_result('Initiating upgrade to %s %s' % (image, target_id))
+        return trivial_result('Initiating upgrade to %s' % (image))
 
     def upgrade_pause(self):
         if not self.upgrade_state: