]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: handle setting required osd release with no OSDs during upgrade 56705/head
authorAdam King <adking@redhat.com>
Wed, 3 Apr 2024 18:34:08 +0000 (14:34 -0400)
committerAdam King <adking@redhat.com>
Thu, 4 Apr 2024 15:38:18 +0000 (11:38 -0400)
A change to the `ceph osd require-osd-release` command made it so
it fails if no OSDs are up unless --yes-i-really-mean-it is passed.
For real clusters this is likely not an issue, but it can be an
annoyance for trying upgrades on test clusters that may not have
OSDs deployed. This patch is to try and just pass the flag in cases
where we have no OSDs rather than failing the upgrade

Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/upgrade.py

index de4b1a1902fe6bc317432fe71723ac46f61adb25..7a98a74b03d9eb59b6f54c0f5aa147e623286d54 100644 (file)
@@ -13,6 +13,8 @@ from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UP
 from cephadm.ssh import HostConnectionError
 from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service
 
+from mgr_module import MonCommandFailed
+
 if TYPE_CHECKING:
     from .module import CephadmOrchestrator
 
@@ -980,10 +982,32 @@ class CephadmUpgrade:
         if osd_min < int(target_major):
             logger.info(
                 f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}')
-            ret, _, err = self.mgr.check_mon_command({
-                'prefix': 'osd require-osd-release',
-                'release': target_major_name,
-            })
+            try:
+                ret, out, err = self.mgr.check_mon_command({
+                    'prefix': 'osd require-osd-release',
+                    'release': target_major_name,
+                })
+            except MonCommandFailed as e:
+                # recently it was changed so that `ceph osd require-osd-release`
+                # will fail if run on a cluster with no OSDs unless --yes-i-really-mean-it
+                # is passed. If we get that specific failure and we actually have no OSD
+                # daemons, we should just try to pass the flag
+                if "no OSDs are up" in str(e):
+                    if not self.mgr.cache.get_daemons_by_type('osd'):
+                        # this is the case where we actually have no OSDs in the cluster
+                        ret, _, err = self.mgr.check_mon_command({
+                            'prefix': 'osd require-osd-release',
+                            'release': target_major_name,
+                            'yes_i_really_mean_it': True
+                        })
+                    else:
+                        # this is the case where we do have OSDs listed, but none of them are up
+                        raise OrchestratorError(
+                            'All OSDs down, causing a failure setting the minimum required OSD release. '
+                            'If you are sure you\'d like to move forward, please run '
+                            '"ceph osd require-osd-release --yes-i-really-mean-it" then resume the upgrade')
+                else:
+                    raise
 
     def _complete_mds_upgrade(self) -> None:
         assert self.upgrade_state is not None