From: Ashwin M. Joshi Date: Fri, 24 Apr 2026 09:14:06 +0000 (+0530) Subject: mgr: ok-to-upgrade added code comments for flow clarity X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=afb252a164a05e38ce6c66ef6780369d7bb870da;p=ceph.git mgr: ok-to-upgrade added code comments for flow clarity Fixes: https://tracker.ceph.com/issues/75603 Signed-off-by: Ashwin M. Joshi --- diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py index dbe37e53df6a..c008abdf980c 100644 --- a/src/pybind/mgr/cephadm/tests/test_upgrade.py +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -314,13 +314,18 @@ def test_ok_to_upgrade_mon_report_from_parsed_body(): assert rep.mon_resp_as_dict()['bad_no_version'] == [5] -def test_ok_to_upgrade_mon_report_non_list_osd_array_becomes_empty(): +def test_ok_to_upgrade_mon_report_non_list_osd_array_becomes_empty(caplog): + caplog.set_level(logging.WARNING, logger='cephadm.upgrade') rep = OkToUpgradeMonReport.from_parsed_body({ 'ok_to_upgrade': True, 'all_osds_upgraded': False, 'osds_ok_to_upgrade': 'not-a-list', }) assert rep.osds_ok_to_upgrade == [] + assert any( + 'expected list of osd ids' in r.getMessage() + for r in caplog.records + ) def test_ok_to_upgrade_mon_report_matches_mgr_json_formatter_shape(): diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 15773447df98..f2dbc1957572 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -4,7 +4,7 @@ import logging import time import uuid from dataclasses import dataclass, field, asdict -from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any, Mapping, cast, Set +from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any, cast, Set from cephadm.services.service_registry import service_registry import orchestrator @@ -60,7 +60,7 @@ def normalize_image_digest(digest: str, default_registry: str) -> str: return digest -def _get_boolean_values_from_mon_json(value: Any) -> Optional[bool]: +def _get_bool_value_from_mon_json(value: Any) -> Optional[bool]: """Handle only JSON booleans for ``ok_to_upgrade`` / ``all_osds_upgraded``.""" if isinstance(value, bool): return value @@ -79,6 +79,12 @@ def _get_osd_ids_from_mon_json(value: Any) -> List[int]: """ if isinstance(value, list): return list(value) + if value is not None: + logger.warning( + 'osd ok-to-upgrade: expected list of osd ids, got %s: %r', + type(value), + value, + ) return [] @@ -89,8 +95,12 @@ class OkToUpgradeMonReport: parse_ok_to_upgrade_mon_json unwraps the top-level ok_to_upgrade object. Field names match the keys in the inner report object from the mon JSON. - (ok_to_upgrade, all_osds_upgraded, osds_in_crush_bucket, - osds_ok_to_upgrade, osds_upgraded, bad_no_version). + ok_to_upgrade: : JSON bool; True if mon found a safe batch of OSDs to upgrade. + all_osds_upgraded: : JSON bool; True if every bucket OSD already on target ceph_version. + osds_ok_to_upgrade: List[int] : OSD ids safe to upgrade this step + osds_in_crush_bucket: List[int] : OSD ids under the named CRUSH bucket + osds_upgraded: List[int] : OSD ids already matching target ceph_version_short in metadata. + bad_no_version: List[int] : OSD ids with missing ceph_version_short in mgr metadata. """ ok_to_upgrade: Optional[bool] @@ -102,13 +112,13 @@ class OkToUpgradeMonReport: @classmethod def from_parsed_body(cls, body: Any) -> 'OkToUpgradeMonReport': - if not isinstance(body, Mapping): + if not isinstance(body, dict): raise ValueError( f'osd ok-to-upgrade: expected JSON object after unwrap, got {type(body)!r}') b = dict(body) return cls( - ok_to_upgrade=_get_boolean_values_from_mon_json(b.get('ok_to_upgrade')), - all_osds_upgraded=_get_boolean_values_from_mon_json(b.get('all_osds_upgraded')), + ok_to_upgrade=_get_bool_value_from_mon_json(b.get('ok_to_upgrade')), + all_osds_upgraded=_get_bool_value_from_mon_json(b.get('all_osds_upgraded')), osds_ok_to_upgrade=_get_osd_ids_from_mon_json(b.get('osds_ok_to_upgrade')), osds_in_crush_bucket=_get_osd_ids_from_mon_json(b.get('osds_in_crush_bucket')), osds_upgraded=_get_osd_ids_from_mon_json(b.get('osds_upgraded')), @@ -266,8 +276,10 @@ class CephadmUpgrade: else: self.upgrade_state = None self.upgrade_info_str: str = '' - # Set during _to_upgrade when last osd ok-to-upgrade call reported all bucket OSDs - # on target version (no batch ids); used for logging and to skip repeat mon RPCs. + # Set during _to_upgrade when last osd ok-to-upgrade reported all bucket OSDs + # on target version (all_osds_upgraded=True). For OSDs still in need_upgrade for an + # image/digest mismatch, this helps the code fall back to ok-to-stop for + # per-daemon PG safety. self._ok_to_upgrade_all_osds_upgraded: bool = False # osd. names under the upgrade CRUSH bucket from the last osd ok-to-upgrade # report (``osds_in_crush_bucket``). Used so ok-to-stop ``known`` (cluster-wide)