From: Sage Weil Date: Fri, 26 Feb 2021 16:53:54 +0000 (-0500) Subject: mgr/cephadm/upgrade: restart multiple osds at once X-Git-Tag: v17.1.0~2741^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b3d0420e60bcaede1d253bd551f201e756cc3d9a;p=ceph.git mgr/cephadm/upgrade: restart multiple osds at once Restart multiple osds in a single upgrade pass, when possible. Signed-off-by: Sage Weil --- diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 189788cb28e4..ed4bbb6c9a62 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -225,7 +225,7 @@ class CephadmUpgrade: # setting force flag to retain old functionality. r = self.mgr.cephadm_services[daemon_type_to_service(s.daemon_type)].ok_to_stop([ - s.daemon_id], force=True) + s.daemon_id], known=known, force=True) if not r.retval: logger.info(f'Upgrade: {r.stdout}') @@ -453,11 +453,42 @@ class CephadmUpgrade: ): return + to_upgrade = [] + known_ok_to_stop: List[str] = [] for d in need_upgrade: assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None + if not d.container_image_id: + if d.container_image_name == target_image: + logger.debug( + 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) + continue + + if known_ok_to_stop: + if d.name() in known_ok_to_stop: + logger.info(f'Upgrade: {d.name()} is also safe to restart') + to_upgrade.append(d) + continue + + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return + + to_upgrade.append(d) + + # if we don't have a list of others to consider, stop now + if not known_ok_to_stop: + break + + num = 1 + for d in to_upgrade: + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + + self._update_upgrade_progress(done / len(daemons)) + # make sure host has latest container image out, errs, code = CephadmServe(self.mgr)._run_cephadm( d.hostname, '', 'inspect-image', [], @@ -486,17 +517,12 @@ class CephadmUpgrade: self._save_upgrade_state() return - self._update_upgrade_progress(done / len(daemons)) - - if not d.container_image_id: - if d.container_image_name == target_image: - logger.debug( - 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) - continue - if not self._wait_for_ok_to_stop(d): - return - logger.info('Upgrade: Updating %s.%s' % - (d.daemon_type, d.daemon_id)) + if len(to_upgrade) > 1: + logger.info('Upgrade: Updating %s.%s (%d/%d)' % + (d.daemon_type, d.daemon_id, num, len(to_upgrade))) + else: + logger.info('Upgrade: Updating %s.%s' % + (d.daemon_type, d.daemon_id)) try: self.mgr._daemon_action( d.daemon_type, @@ -514,6 +540,9 @@ class CephadmUpgrade: f'Upgrade daemon: {d.name()}: {e}' ], }) + return + num += 1 + if to_upgrade: return # complete mon upgrade?