From: Ashwin M. Joshi Date: Fri, 8 May 2026 07:05:53 +0000 (+0530) Subject: mgr: Accept only osd daemon type for bucket params to adhere to upgrade sequence X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F68398%2Fhead;p=ceph.git mgr: Accept only osd daemon type for bucket params to adhere to upgrade sequence Fixes: https://tracker.ceph.com/issues/75603 Signed-off-by: Ashwin M. Joshi --- diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst index 1ac579b6d42c..823f1623dbea 100644 --- a/doc/cephadm/upgrade.rst +++ b/doc/cephadm/upgrade.rst @@ -112,16 +112,15 @@ This process uses the ceph ``osd ok-to-upgrade`` command. Requirements: -* For OSD-only upgrades, pass both ``--crush_bucket_type`` and ``--crush_bucket_name``. - Supported types today are ``host``, ``rack``, and ``chassis``. +* For OSD-only upgrades, pass both ``--crush_bucket_type`` and ``--crush_bucket_name`` + and ``--daemon-types osd`` only. Supported types today are ``host``, ``rack``, + and ``chassis``. * The Monitor's ``osd ok-to-upgrade`` expects the target **short** Ceph version (same shape as ``ceph_version_short`` in ``ceph osd metadata``). * If the Monitors indicate to cephadm that no OSDs in the selected CRUSH bucket are okay to upgrade, cephadm will log details and then retry the operation. - If the bucket parameters for a ceph ``osd ok-to-upgrade`` upgrade are not provided, +* If the bucket parameters for a ceph ``osd ok-to-upgrade`` upgrade are not provided, cephadm will fall back to the default ceph osd ok-to-stop gate for OSD upgrades. -* If bucket parameters are not provided, cephadm will fall back to ``osd ok-to-stop`` - for OSD upgrades. * Bucket-scope upgrades apply only to OSDs. CRUSH buckets do not influence upgrades of other daemon types, for example Monitors, Managers, and MDSes. diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index aff629fcde1e..0b005c20a5c4 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -4197,6 +4197,10 @@ Then run the following: raise OrchestratorError( '--hosts cannot be combined with --crush_bucket_type or --crush_bucket_name') + if services is not None and (bucket_type is not None or bucket_name is not None): + raise OrchestratorError( + '--services cannot be combined with --crush_bucket_type or --crush_bucket_name') + if limit is not None: if limit < 1: raise OrchestratorError( diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py index c008abdf980c..5529742f893f 100644 --- a/src/pybind/mgr/cephadm/tests/test_upgrade.py +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -59,6 +59,23 @@ def test_upgrade_start_hosts_mutually_exclusive_with_bucket(cephadm_module: Ceph assert str(err.value) == '--hosts cannot be combined with --crush_bucket_type or --crush_bucket_name' +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start_services_mutually_exclusive_with_bucket(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True): + with pytest.raises(OrchestratorError) as err: + cephadm_module.upgrade_start( + 'image_id', None, + services=['mgr'], + bucket_type='rack', + bucket_name='rack-a', + ) + assert str(err.value) == ( + '--services cannot be combined with --crush_bucket_type or ' + '--crush_bucket_name') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) def test_upgrade_start_offline_hosts(cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): @@ -283,8 +300,7 @@ def test_upgrade_status_which_full_cluster_with_crush_bucket(cephadm_module: Cep ) with mock.patch.object(cephadm_module.upgrade, '_get_upgrade_info', return_value=('0/0', [])): status = wait(cephadm_module, cephadm_module.upgrade_status()) - assert status.which == ( - 'Upgrading all daemon types on all hosts (OSDs only in bucket scope)') + assert status.which == 'Upgrading all daemon types on all hosts' def test_parse_ok_to_upgrade_mon_json_nested_and_flat(): @@ -522,13 +538,16 @@ def test_validate_failure_domain_upgrade_options_multi_token_name(cephadm_module def test_validate_failure_domain_upgrade_options_daemon_types(cephadm_module: CephadmOrchestrator): - cephadm_module.upgrade._validate_failure_domain_upgrade_options( - 'rack', 'rack-a', None) - cephadm_module.upgrade._validate_failure_domain_upgrade_options( - 'rack', 'rack-a', ['osd', 'mds']) - cephadm_module.upgrade._validate_failure_domain_upgrade_options( - 'rack', 'rack-a', ['mgr', 'mon', 'osd']) - osd_msg = 'Bucket parameters for OSD upgrade require --daemon-types to include "osd"' + osd_msg = 'Bucket parameters for OSD upgrade require --daemon-types to be "osd"' + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_failure_domain_upgrade_options( + 'rack', 'rack-a', None) + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_failure_domain_upgrade_options( + 'rack', 'rack-a', ['osd', 'mds']) + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_failure_domain_upgrade_options( + 'rack', 'rack-a', ['mgr', 'mon', 'osd']) with pytest.raises(OrchestratorError) as err: cephadm_module.upgrade._validate_failure_domain_upgrade_options( 'rack', 'rack-a', ['mgr', 'mon']) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index f2dbc1957572..f4e3188c812b 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -300,7 +300,7 @@ class CephadmUpgrade: def _upgrade_status_osd_bucket_scope_active(self) -> bool: """True when upgrade state selects OSD bucket scope""" st = self.upgrade_state - return bool(st and st.crush_bucket_name) + return bool(st and st.crush_bucket_name and st.crush_bucket_type) def upgrade_status(self) -> orchestrator.UpgradeStatusSpec: r = orchestrator.UpgradeStatusSpec() @@ -325,8 +325,6 @@ class CephadmUpgrade: which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}' elif self.upgrade_state.hosts is not None: which_str = f'Upgrading all daemons on host(s) {",".join(self.upgrade_state.hosts)}' - elif osd_bucket_scope: - which_str = 'Upgrading all daemon types on all hosts (OSDs only in bucket scope)' else: which_str = 'Upgrading all daemon types on all hosts' if self.upgrade_state.total_count is not None and self.upgrade_state.remaining_count is not None: @@ -489,17 +487,6 @@ class CephadmUpgrade: raise OrchestratorError( f'Supported bucket types for OSD upgrade are: {allowed} (specified: {crush_bucket_type!r})') - osd_in_upgrade_scope = ( - daemon_types is None - or any( - (dt or '').strip().lower() == 'osd' - for dt in daemon_types - ) - ) - if not osd_in_upgrade_scope: - raise OrchestratorError( - 'Bucket parameters for OSD upgrade require --daemon-types to include "osd"') - single_bucket_name = ( ',' not in bucket_name and len(bucket_name.split()) == 1 @@ -509,6 +496,15 @@ class CephadmUpgrade: raise OrchestratorError( 'Invalid --crush_bucket_name: use a single name token without commas') + osd_only = ( + daemon_types is not None + and len(daemon_types) == 1 + and (daemon_types[0] or '').strip().lower() == 'osd' + ) + if not osd_only: + raise OrchestratorError( + 'Bucket parameters for OSD upgrade require --daemon-types to be "osd"') + @staticmethod def is_mon_error_for_invalid_bucket(err: MonCommandFailed) -> bool: """