From: Adam King Date: Thu, 11 Feb 2021 16:43:01 +0000 (-0500) Subject: mgr/cephadm: redeploy daemons deployed using old image during upgrade X-Git-Tag: v17.1.0~2516^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=83f0ca9ea02d51d3311ae60d6805fea7c2546b3d;p=ceph.git mgr/cephadm: redeploy daemons deployed using old image during upgrade Add extra check that daemons were deployed by mgr using new image during upgrade. Makes sure unit.run file for all daemons are updated if they changed between old and new images. Fixes: https://tracker.ceph.com/issues/49013 Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py index 88b5c939a27c7..7304ec41c36fb 100644 --- a/src/pybind/mgr/cephadm/tests/test_upgrade.py +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -83,6 +83,7 @@ def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): container_id='container_id', container_image_id='image_id', container_image_digests=['to_image@repo_digest'], + deployed_by=['to_image@repo_digest'], version='version', state='running', ) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index cb47af306747a..92067a48ec681 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Optional, Dict, List, Tuple import orchestrator from cephadm.serve import CephadmServe from cephadm.services.cephadmservice import CephadmDaemonDeploySpec -from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER +from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, MONITORING_STACK_TYPES from orchestrator import OrchestratorError, DaemonDescription, daemon_type_to_service if TYPE_CHECKING: @@ -452,18 +452,29 @@ class CephadmUpgrade: logger.info('Upgrade: Checking %s daemons' % daemon_type) need_upgrade_self = False - need_upgrade = [] + need_upgrade: List[Tuple[DaemonDescription, bool]] = [] for d in daemons: if d.daemon_type != daemon_type: continue - if any(d in target_digests for d in (d.container_image_digests or [])): + correct_digest = False + if (any(d in target_digests for d in (d.container_image_digests or [])) + or d.daemon_type in MONITORING_STACK_TYPES): logger.debug('daemon %s.%s container digest correct' % ( daemon_type, d.daemon_id)) - done += 1 - continue - logger.debug('daemon %s.%s not correct (%s, %s, %s)' % ( - daemon_type, d.daemon_id, - d.container_image_name, d.container_image_digests, d.version)) + correct_digest = True + if any(d in target_digests for d in (d.deployed_by or [])): + logger.debug('daemon %s.%s deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + done += 1 + continue + + if correct_digest: + logger.debug('daemon %s.%s not deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + else: + logger.debug('daemon %s.%s not correct (%s, %s, %s)' % ( + daemon_type, d.daemon_id, + d.container_image_name, d.container_image_digests, d.version)) assert d.daemon_type is not None assert d.daemon_id is not None @@ -474,22 +485,23 @@ class CephadmUpgrade: need_upgrade_self = True continue - need_upgrade.append(d) + need_upgrade.append((d, correct_digest)) # prepare filesystems for daemon upgrades? if ( daemon_type == 'mds' and need_upgrade - and not self._prepare_for_mds_upgrade(target_major, need_upgrade) + and not self._prepare_for_mds_upgrade(target_major, [d_entry[0] for d_entry in need_upgrade]) ): return if need_upgrade: self.upgrade_info_str = 'Currently upgrading %s daemons' % (daemon_type) - to_upgrade = [] + to_upgrade: List[Tuple[DaemonDescription, bool]] = [] known_ok_to_stop: List[str] = [] - for d in need_upgrade: + for d_entry in need_upgrade: + d = d_entry[0] assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None @@ -503,7 +515,7 @@ class CephadmUpgrade: if known_ok_to_stop: if d.name() in known_ok_to_stop: logger.info(f'Upgrade: {d.name()} is also safe to restart') - to_upgrade.append(d) + to_upgrade.append(d_entry) continue if d.daemon_type in ['mon', 'osd', 'mds']: @@ -512,14 +524,15 @@ class CephadmUpgrade: if not self._wait_for_ok_to_stop(d, known_ok_to_stop): return - to_upgrade.append(d) + to_upgrade.append(d_entry) # if we don't have a list of others to consider, stop now if not known_ok_to_stop: break num = 1 - for d in to_upgrade: + for d_entry in to_upgrade: + d = d_entry[0] assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None @@ -566,17 +579,18 @@ class CephadmUpgrade: else: logger.info('Upgrade: Updating %s.%s' % (d.daemon_type, d.daemon_id)) + action = 'Upgrading' if not d_entry[1] else 'Redeploying' try: daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d) self.mgr._daemon_action( daemon_spec, 'redeploy', - image=target_image + image=target_image if not d_entry[1] else None ) except Exception as e: self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', { 'severity': 'warning', - 'summary': f'Upgrading daemon {d.name()} on host {d.hostname} failed.', + 'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.', 'count': 1, 'detail': [ f'Upgrade daemon: {d.name()}: {e}' diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py index bde3d1f4e261d..0ca4ad644c1d4 100644 --- a/src/pybind/mgr/cephadm/utils.py +++ b/src/pybind/mgr/cephadm/utils.py @@ -23,7 +23,8 @@ class CephadmNoImage(Enum): # NOTE: order important here as these are used for upgrade order CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror'] GATEWAY_TYPES = ['iscsi', 'nfs'] -CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES +MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', 'alertmanager', 'grafana'] +CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES # Used for _run_cephadm used for check-host etc that don't require an --image parameter diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index dec6b0ad5d9a6..5fd38f856a71c 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -848,7 +848,7 @@ class DaemonDescription(object): self.ports: Optional[List[int]] = ports self.ip: Optional[str] = ip - + self.deployed_by = deployed_by self.is_active = is_active