]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: redeploy daemons deployed using old image during upgrade 39435/head
authorAdam King <adking@redhat.com>
Thu, 11 Feb 2021 16:43:01 +0000 (11:43 -0500)
committerAdam King <adking@redhat.com>
Thu, 18 Mar 2021 22:23:16 +0000 (18:23 -0400)
Add extra check that daemons were deployed by mgr using new image
during upgrade. Makes sure unit.run file for all daemons are updated
if they changed between old and new images.

Fixes: https://tracker.ceph.com/issues/49013
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/tests/test_upgrade.py
src/pybind/mgr/cephadm/upgrade.py
src/pybind/mgr/cephadm/utils.py
src/pybind/mgr/orchestrator/_interface.py

index 88b5c939a27c76b7cdc8b8eccfeb90d94dfe360c..7304ec41c36fbaf1749d8f501a79995cb5059c6a 100644 (file)
@@ -83,6 +83,7 @@ def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
                             container_id='container_id',
                             container_image_id='image_id',
                             container_image_digests=['to_image@repo_digest'],
+                            deployed_by=['to_image@repo_digest'],
                             version='version',
                             state='running',
                         )
index cb47af306747a80b6c25ed63d7a5e8c8a4434e26..92067a48ec681b38959c0a2ba244c04f37da5466 100644 (file)
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Optional, Dict, List, Tuple
 import orchestrator
 from cephadm.serve import CephadmServe
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
-from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER
+from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, MONITORING_STACK_TYPES
 from orchestrator import OrchestratorError, DaemonDescription, daemon_type_to_service
 
 if TYPE_CHECKING:
@@ -452,18 +452,29 @@ class CephadmUpgrade:
             logger.info('Upgrade: Checking %s daemons' % daemon_type)
 
             need_upgrade_self = False
-            need_upgrade = []
+            need_upgrade: List[Tuple[DaemonDescription, bool]] = []
             for d in daemons:
                 if d.daemon_type != daemon_type:
                     continue
-                if any(d in target_digests for d in (d.container_image_digests or [])):
+                correct_digest = False
+                if (any(d in target_digests for d in (d.container_image_digests or []))
+                        or d.daemon_type in MONITORING_STACK_TYPES):
                     logger.debug('daemon %s.%s container digest correct' % (
                         daemon_type, d.daemon_id))
-                    done += 1
-                    continue
-                logger.debug('daemon %s.%s not correct (%s, %s, %s)' % (
-                    daemon_type, d.daemon_id,
-                    d.container_image_name, d.container_image_digests, d.version))
+                    correct_digest = True
+                    if any(d in target_digests for d in (d.deployed_by or [])):
+                        logger.debug('daemon %s.%s deployed by correct version' % (
+                            d.daemon_type, d.daemon_id))
+                        done += 1
+                        continue
+
+                if correct_digest:
+                    logger.debug('daemon %s.%s not deployed by correct version' % (
+                        d.daemon_type, d.daemon_id))
+                else:
+                    logger.debug('daemon %s.%s not correct (%s, %s, %s)' % (
+                        daemon_type, d.daemon_id,
+                        d.container_image_name, d.container_image_digests, d.version))
 
                 assert d.daemon_type is not None
                 assert d.daemon_id is not None
@@ -474,22 +485,23 @@ class CephadmUpgrade:
                     need_upgrade_self = True
                     continue
 
-                need_upgrade.append(d)
+                need_upgrade.append((d, correct_digest))
 
             # prepare filesystems for daemon upgrades?
             if (
                 daemon_type == 'mds'
                 and need_upgrade
-                and not self._prepare_for_mds_upgrade(target_major, need_upgrade)
+                and not self._prepare_for_mds_upgrade(target_major, [d_entry[0] for d_entry in need_upgrade])
             ):
                 return
 
             if need_upgrade:
                 self.upgrade_info_str = 'Currently upgrading %s daemons' % (daemon_type)
 
-            to_upgrade = []
+            to_upgrade: List[Tuple[DaemonDescription, bool]] = []
             known_ok_to_stop: List[str] = []
-            for d in need_upgrade:
+            for d_entry in need_upgrade:
+                d = d_entry[0]
                 assert d.daemon_type is not None
                 assert d.daemon_id is not None
                 assert d.hostname is not None
@@ -503,7 +515,7 @@ class CephadmUpgrade:
                 if known_ok_to_stop:
                     if d.name() in known_ok_to_stop:
                         logger.info(f'Upgrade: {d.name()} is also safe to restart')
-                        to_upgrade.append(d)
+                        to_upgrade.append(d_entry)
                     continue
 
                 if d.daemon_type in ['mon', 'osd', 'mds']:
@@ -512,14 +524,15 @@ class CephadmUpgrade:
                     if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
                         return
 
-                to_upgrade.append(d)
+                to_upgrade.append(d_entry)
 
                 # if we don't have a list of others to consider, stop now
                 if not known_ok_to_stop:
                     break
 
             num = 1
-            for d in to_upgrade:
+            for d_entry in to_upgrade:
+                d = d_entry[0]
                 assert d.daemon_type is not None
                 assert d.daemon_id is not None
                 assert d.hostname is not None
@@ -566,17 +579,18 @@ class CephadmUpgrade:
                 else:
                     logger.info('Upgrade: Updating %s.%s' %
                                 (d.daemon_type, d.daemon_id))
+                action = 'Upgrading' if not d_entry[1] else 'Redeploying'
                 try:
                     daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
                     self.mgr._daemon_action(
                         daemon_spec,
                         'redeploy',
-                        image=target_image
+                        image=target_image if not d_entry[1] else None
                     )
                 except Exception as e:
                     self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', {
                         'severity': 'warning',
-                        'summary': f'Upgrading daemon {d.name()} on host {d.hostname} failed.',
+                        'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.',
                         'count': 1,
                         'detail': [
                             f'Upgrade daemon: {d.name()}: {e}'
index bde3d1f4e261d5e6fc1ef238ba84eb5fd2a46c56..0ca4ad644c1d4acfa9c85aeca548717311bbb515 100644 (file)
@@ -23,7 +23,8 @@ class CephadmNoImage(Enum):
 # NOTE: order important here as these are used for upgrade order
 CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror']
 GATEWAY_TYPES = ['iscsi', 'nfs']
-CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES
+MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', 'alertmanager', 'grafana']
+CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES
 
 
 # Used for _run_cephadm used for check-host etc that don't require an --image parameter
index dec6b0ad5d9a6a02b96e7914b3e203eb23889b70..5fd38f856a71c949bd4717d242f8ab100328068c 100644 (file)
@@ -848,7 +848,7 @@ class DaemonDescription(object):
 
         self.ports: Optional[List[int]] = ports
         self.ip: Optional[str] = ip
-        
+
         self.deployed_by = deployed_by
 
         self.is_active = is_active