From c948fc5dce085a17bbf66a049ce8465e044c441c Mon Sep 17 00:00:00 2001 From: Adam King Date: Sun, 12 Feb 2023 13:53:14 -0500 Subject: [PATCH] mgr/cephadm: make upgrade respect use_repo_digest If the option is false, we should upgrade images based on whether their container image name matches, not whether the digest is the same or not. Fixes: https://tracker.ceph.com/issues/58698 Signed-off-by: Adam King (cherry picked from commit 78a73d146586e2f1f15d44c3b499f24299bc6662) --- src/pybind/mgr/cephadm/module.py | 10 +++++- src/pybind/mgr/cephadm/tests/test_upgrade.py | 1 + src/pybind/mgr/cephadm/upgrade.py | 37 ++++++++++++++------ src/pybind/mgr/orchestrator/_interface.py | 18 ++++++++++ 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 18b813abbea..6a32fb90549 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3083,7 +3083,15 @@ Then run the following: } for host, dm in self.cache.daemons.items(): for name, dd in dm.items(): - if image_info.image_id == dd.container_image_id: + # check if the container digest for the digest we're checking upgrades for matches + # the container digests for the daemon if "use_repo_digest" setting is true + # or that the image name matches the daemon's image name if "use_repo_digest" + # is false. The idea is to generally check if the daemon is already using + # the image we're checking upgrade to. + if ( + (self.use_repo_digest and dd.matches_digests(image_info.repo_digests)) + or (not self.use_repo_digest and dd.matches_image_name(image)) + ): r['up_to_date'].append(dd.name()) elif dd.daemon_type in CEPH_IMAGE_TYPES: r['needs_update'][dd.name()] = { diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py index 107395b93a1..393fc37aaff 100644 --- a/src/pybind/mgr/cephadm/tests/test_upgrade.py +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -136,6 +136,7 @@ def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): style='cephadm', fsid='fsid', container_id='container_id', + container_image_name='to_image', container_image_id='image_id', container_image_digests=['to_image@repo_digest'], deployed_by=['to_image@repo_digest'], diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 28c96fa6385..5f96a419ea0 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -438,7 +438,7 @@ class CephadmUpgrade: d for d in daemons if d.hostname is not None and d.hostname not in hosts] daemons = _get_earlier_daemons([_latest_type(dtypes)], other_hosts_daemons) err_msg_base += 'Daemons with types earlier in upgrade order than daemons on given host need upgrading.\n' - need_upgrade_self, n1, n2, _ = self._detect_need_upgrade(daemons, target_digests) + need_upgrade_self, n1, n2, _ = self._detect_need_upgrade(daemons, target_digests, target_name) if need_upgrade_self and ('mgr' not in dtypes or (daemon_types is None and services is None)): # also report active mgr as needing to be upgraded. It is not included in the resulting list # by default as it is treated special and handled via the need_upgrade_self bool @@ -730,7 +730,7 @@ class CephadmUpgrade: return True # if mds has no fs it should pass ok-to-stop - def _detect_need_upgrade(self, daemons: List[DaemonDescription], target_digests: Optional[List[str]] = None) -> Tuple[bool, List[Tuple[DaemonDescription, bool]], List[Tuple[DaemonDescription, bool]], int]: + def _detect_need_upgrade(self, daemons: List[DaemonDescription], target_digests: Optional[List[str]] = None, target_name: Optional[str] = None) -> Tuple[bool, List[Tuple[DaemonDescription, bool]], List[Tuple[DaemonDescription, bool]], int]: # this function takes a list of daemons and container digests. The purpose # is to go through each daemon and check if the current container digests # for that daemon match the target digests. The purpose being that we determine @@ -743,18 +743,33 @@ class CephadmUpgrade: done = 0 if target_digests is None: target_digests = [] + if target_name is None: + target_name = '' for d in daemons: assert d.daemon_type is not None assert d.daemon_id is not None assert d.hostname is not None if self.mgr.use_agent and not self.mgr.cache.host_metadata_up_to_date(d.hostname): continue - correct_digest = False - if (any(d in target_digests for d in (d.container_image_digests or [])) - or d.daemon_type in MONITORING_STACK_TYPES): - logger.debug('daemon %s.%s container digest correct' % ( + correct_image = False + # check if the container digest for the digest we're upgrading to matches + # the container digest for the daemon if "use_repo_digest" setting is true + # or that the image name matches the daemon's image name if "use_repo_digest" + # is false. The idea is to generally check if the daemon is already using + # the image we're upgrading to or not. Additionally, since monitoring stack + # daemons are included in the upgrade process but don't use the ceph images + # we are assuming any monitoring stack daemon is on the "correct" image already + if ( + (self.mgr.use_repo_digest and d.matches_digests(target_digests)) + or (not self.mgr.use_repo_digest and d.matches_image_name(target_name)) + or (d.daemon_type in MONITORING_STACK_TYPES) + ): + logger.debug('daemon %s.%s on correct image' % ( d.daemon_type, d.daemon_id)) - correct_digest = True + correct_image = True + # do deployed_by check using digest no matter what. We don't care + # what repo the image used to deploy the daemon was as long + # as the image content is correct if any(d in target_digests for d in (d.deployed_by or [])): logger.debug('daemon %s.%s deployed by correct version' % ( d.daemon_type, d.daemon_id)) @@ -767,7 +782,7 @@ class CephadmUpgrade: need_upgrade_self = True continue - if correct_digest: + if correct_image: logger.debug('daemon %s.%s not deployed by correct version' % ( d.daemon_type, d.daemon_id)) need_upgrade_deployer.append((d, True)) @@ -1160,7 +1175,7 @@ class CephadmUpgrade: daemons_of_type = [d for d in daemons if d.daemon_type == daemon_type] need_upgrade_self, need_upgrade, need_upgrade_deployer, done = self._detect_need_upgrade( - daemons_of_type, target_digests) + daemons_of_type, target_digests, target_image) upgraded_daemon_count += done self._update_upgrade_progress(upgraded_daemon_count / len(daemons)) @@ -1171,7 +1186,7 @@ class CephadmUpgrade: [d[0].name() for d in need_upgrade_deployer] dds = [d for d in self.mgr.cache.get_daemons_by_type( daemon_type) if d.name() not in need_upgrade_names] - need_upgrade_active, n1, n2, __ = self._detect_need_upgrade(dds, target_digests) + need_upgrade_active, n1, n2, __ = self._detect_need_upgrade(dds, target_digests, target_image) if not n1: if not need_upgrade_self and need_upgrade_active: need_upgrade_self = True @@ -1209,7 +1224,7 @@ class CephadmUpgrade: # types. If we haven't actually finished upgrading all the daemons # of this type, we should exit the loop here _, n1, n2, _ = self._detect_need_upgrade( - self.mgr.cache.get_daemons_by_type(daemon_type), target_digests) + self.mgr.cache.get_daemons_by_type(daemon_type), target_digests, target_image) if n1 or n2: continue diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index d08e76d8d79..bb9726744a6 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1023,6 +1023,24 @@ class DaemonDescription(object): return (daemon_type_to_service(self.daemon_type) + '.' + self.daemon_id).startswith(service_name + '.') return False + def matches_digests(self, digests: Optional[List[str]]) -> bool: + # the DaemonDescription class maintains a list of container digests + # for the container image last reported as being used for the daemons. + # This function checks if any of those digests match any of the digests + # in the list of digests provided as an arg to this function + if not digests or not self.container_image_digests: + return False + return any(d in digests for d in self.container_image_digests) + + def matches_image_name(self, image_name: Optional[str]) -> bool: + # the DaemonDescription class has an attribute that tracks the image + # name of the container image last reported as being used by the daemon. + # This function compares if the image name provided as an arg matches + # the image name in said attribute + if not image_name or not self.container_image_name: + return False + return image_name == self.container_image_name + def service_id(self) -> str: assert self.daemon_id is not None assert self.daemon_type is not None -- 2.39.5