From: Adam King <adking@redhat.com>
Date: Wed, 3 Apr 2024 18:34:08 +0000 (-0400)
Subject: mgr/cephadm: handle setting required osd release with no OSDs during upgrade
X-Git-Tag: testing/wip-batrick-testing-20240411.154038~26^2
X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=1033251fd3e021c92e0f407f63632b85073e4a36;p=ceph-ci.git

mgr/cephadm: handle setting required osd release with no OSDs during upgrade

A change to the `ceph osd require-osd-release` command made it so
it fails if no OSDs are up unless --yes-i-really-mean-it is passed.
For real clusters this is likely not an issue, but it can be an
annoyance for trying upgrades on test clusters that may not have
OSDs deployed. This patch is to try and just pass the flag in cases
where we have no OSDs rather than failing the upgrade

Signed-off-by: Adam King <adking@redhat.com>
---

diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py
index de4b1a1902f..7a98a74b03d 100644
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -13,6 +13,8 @@ from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UP
 from cephadm.ssh import HostConnectionError
 from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service
 
+from mgr_module import MonCommandFailed
+
 if TYPE_CHECKING:
     from .module import CephadmOrchestrator
 
@@ -980,10 +982,32 @@ class CephadmUpgrade:
         if osd_min < int(target_major):
             logger.info(
                 f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}')
-            ret, _, err = self.mgr.check_mon_command({
-                'prefix': 'osd require-osd-release',
-                'release': target_major_name,
-            })
+            try:
+                ret, out, err = self.mgr.check_mon_command({
+                    'prefix': 'osd require-osd-release',
+                    'release': target_major_name,
+                })
+            except MonCommandFailed as e:
+                # recently it was changed so that `ceph osd require-osd-release`
+                # will fail if run on a cluster with no OSDs unless --yes-i-really-mean-it
+                # is passed. If we get that specific failure and we actually have no OSD
+                # daemons, we should just try to pass the flag
+                if "no OSDs are up" in str(e):
+                    if not self.mgr.cache.get_daemons_by_type('osd'):
+                        # this is the case where we actually have no OSDs in the cluster
+                        ret, _, err = self.mgr.check_mon_command({
+                            'prefix': 'osd require-osd-release',
+                            'release': target_major_name,
+                            'yes_i_really_mean_it': True
+                        })
+                    else:
+                        # this is the case where we do have OSDs listed, but none of them are up
+                        raise OrchestratorError(
+                            'All OSDs down, causing a failure setting the minimum required OSD release. '
+                            'If you are sure you\'d like to move forward, please run '
+                            '"ceph osd require-osd-release --yes-i-really-mean-it" then resume the upgrade')
+                else:
+                    raise
 
     def _complete_mds_upgrade(self) -> None:
         assert self.upgrade_state is not None