From: Sage Weil Date: Sat, 25 Jan 2020 22:09:36 +0000 (-0600) Subject: mgr/cephadm: upgrade: pull image after upgrade start, and for each host X-Git-Tag: v15.1.1~604^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fedcfc0a2d246a153f85f4d96e4f58a80674e82a;p=ceph.git mgr/cephadm: upgrade: pull image after upgrade start, and for each host Make 'upgrade start' return quickly, without first pulling the image. Pull the image once to establish the image_id. For each host, before updating the container, ensure the local image is up to date, and if not, pull. If a pull returns a different image_id, restart upgrade process. (This could live-lock if two hosts have different container registries that return different image ids for the same image name. :/) Signed-off-by: Sage Weil --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index c5d74910d1d3..43e69fd9b0c3 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -419,7 +419,14 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin): return None target_name = self.upgrade_state.get('target_name') - target_id = self.upgrade_state.get('target_id') + target_id = self.upgrade_state.get('target_id', None) + if not target_id: + # need to learn the container hash + self.log.info('Upgrade: First pull of %s' % target_name) + target_id, target_version = self._get_container_image_id(target_name) + self.upgrade_state['target_id'] = target_id + self.upgrade_state['target_version'] = target_version + self._save_upgrade_state() target_version = self.upgrade_state.get('target_version') self.log.info('Upgrade: Target is %s with id %s' % (target_name, target_id)) @@ -455,6 +462,29 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin): need_upgrade_self = True continue + # make sure host has latest container image + out, err, code = self._run_cephadm( + d.nodename, None, 'inspect-image', [], + image=target_name, no_fsid=True) + self.log.debug('out %s code %s' % (out, code)) + if code or json.loads(''.join(out)).get('image_id') != target_id: + self.log.info('Upgrade: Pulling %s on %s' % (target_name, + d.nodename)) + out, err, code = self._run_cephadm( + d.nodename, None, 'pull', [], + image=target_name, no_fsid=True) + if code: + self.log.warning('Upgrade: failed to pull %s on %s' % ( + target_name, d.nodename)) + # FIXME + continue + r = json.loads(''.join(out)) + if r.get('image_id') != target_id: + self.log.info('Upgrade: image %s pull on %s got new image %s (not %s), restarting' % (target_name, d.nodename, r['image_id'], target_id)) + self.upgrade_state['image_id'] = r['image_id'] + self._save_upgrade_state() + return None + if not self._wait_for_ok_to_stop(d): return None self.log.info('Upgrade: Redeploying %s.%s' % @@ -1844,16 +1874,13 @@ class CephadmOrchestrator(MgrModule, orchestrator.OrchestratorClientMixin): self.upgrade_state.get('target_name')) return trivial_result('Upgrade to %s in progress' % self.upgrade_state.get('target_name')) - target_id, target_version = self._get_container_image_id(target_name) self.upgrade_state = { 'target_name': target_name, - 'target_id': target_id, - 'target_version': target_version, } self._save_upgrade_state() self._clear_health_checks() self.event.set() - return trivial_result('Initiating upgrade to %s %s' % (image, target_id)) + return trivial_result('Initiating upgrade to %s' % (image)) def upgrade_pause(self): if not self.upgrade_state: