From: Adam King Date: Thu, 2 May 2024 17:35:41 +0000 (-0400) Subject: mgr/cephadm: make SMB and NVMEoF upgrade last in staggered upgrade X-Git-Tag: v19.1.1~187^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d799ff70f42f55275ad1128414526318d5b80621;p=ceph.git mgr/cephadm: make SMB and NVMEoF upgrade last in staggered upgrade This needs to happen as some work on the NVMEoF side (still unmerged as of writing this) will make the NVMEoF daemon dependent on the mon. Prior to this patch, in a staggered upgrade, all daemons not using the ceph image were upgraded after the mgr since we typically only care about the default image changing or potential changes to how we handle our systemd units which only needs the mgr to be upgraded to be applied. This NVMEoF dependency on the mon changes this and we can no longer upgrade it directly after the mgr. This patch changes it so the NVMEoF daemon is instead upgraded after all ceph image daemons have been upgraded in a staggered upgrade scenario. Non-staggered upgrades are unaffected as the NVMEoF daemon was already upgraded near the end in that scenario. The SMB dameon has no reason it needs to be upgraded later, but it's in the (small) pool of daemons that don't use the ceph image and aren't for monitoring, so it's been affected by this as well. NOTE: This is a bit of an ugly patch imo and shows that a refactoring of the upgrade code is likely required. Hopefully this patch is more of a stopgap until that larger effort can be made Fixes: https://tracker.ceph.com/issues/65809 Signed-off-by: Adam King (cherry picked from commit 5e7a3c2147d87c1fc5be71acbadedefb70e024bf) --- diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 7a98a74b03d9e..d8ffab2da5187 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -9,7 +9,7 @@ from cephadm.registry import Registry from cephadm.serve import CephadmServe from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, \ - CEPH_TYPES, NON_CEPH_IMAGE_TYPES, GATEWAY_TYPES + CEPH_TYPES, CEPH_IMAGE_TYPES, NON_CEPH_IMAGE_TYPES, MONITORING_STACK_TYPES, GATEWAY_TYPES from cephadm.ssh import HostConnectionError from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service @@ -1199,8 +1199,10 @@ class CephadmUpgrade: upgraded_daemon_count += done self._update_upgrade_progress(upgraded_daemon_count / len(daemons)) - # make sure mgr and non-ceph-image daemons are properly redeployed in staggered upgrade scenarios - if daemon_type == 'mgr' or daemon_type in NON_CEPH_IMAGE_TYPES: + # make sure mgr and monitoring stack daemons are properly redeployed in staggered upgrade scenarios + # The idea here is to upgrade the mointoring daemons after the mgr is done upgrading as + # that means cephadm and the dashboard modules themselves have been upgraded + if daemon_type == 'mgr' or daemon_type in MONITORING_STACK_TYPES: if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): need_upgrade_names = [d[0].name() for d in need_upgrade] + \ [d[0].name() for d in need_upgrade_deployer] @@ -1214,6 +1216,20 @@ class CephadmUpgrade: else: # no point in trying to redeploy with new version if active mgr is not on the new version need_upgrade_deployer = [] + elif daemon_type in NON_CEPH_IMAGE_TYPES: + # Also handle daemons that are not on the ceph image but aren't monitoring daemons. + # This needs to be handled differently than the monitoring daemons as the nvmeof daemon, + # which falls in this category, relies on the mons being upgraded as well. This block + # sets these daemon types to be upgraded only when all ceph image daemons have been upgraded + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + ceph_daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in CEPH_IMAGE_TYPES] + _, n1, n2, __ = self._detect_need_upgrade(ceph_daemons, target_digests, target_image) + if not n1 and not n2: + # no ceph daemons need upgrade + dds = [d for d in self.mgr.cache.get_daemons_by_type( + daemon_type) if d.name() not in need_upgrade_names] + _, ___, n2, ____ = self._detect_need_upgrade(dds, target_digests, target_image) + need_upgrade_deployer += n2 if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): # only after the mgr itself is upgraded can we expect daemons to have