From: Guillaume Abrioux Date: Tue, 5 Mar 2024 10:05:18 +0000 (+0000) Subject: mgr/node-proxy: handle 'None' statuses returned by RedFish X-Git-Tag: v20.0.0~2462^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=cc279a3dd23d9219e508b80512f363a4ae577392;p=ceph.git mgr/node-proxy: handle 'None' statuses returned by RedFish Looks like RedFish might return 'None' values for some attributes. for instance: ``` [root@ceph-node-01 ~]# curl -s -k -X GET https://169.254.1.1/redfish/v1/Systems/System.Embedded.1/Storage/AHCI.SL.6-1/Drives/Disk.Direct.0-0:AHCI.SL.6-1 -H "X-Auth-Token: 3264251c28191fa5e7c9ebec49ef90fc" | jq .Status { "Health": "OK", "HealthRollup": "OK", "State": "Enabled" } [root@ceph-node-01 ~]# curl -s -k -X GET https://169.254.1.1/redfish/v1/Systems/System.Embedded.1/Storage/NonRAID.Slot.2-1/Drives/Disk.Bay.0:Enclosure.Internal.0-1:NonRAID.Slot.2-1 -H "X-Auth-Token: 3264251c28191fa5e7c9ebec49ef90fc" | jq .Status { "Health": null, "HealthRollup": null, "State": "Enabled" } [root@ceph-node-01 ~]# ``` Although this seems to be a bug from RedFish, we need to handle the case when it happens otherwise it makes the mgr orchestrator module throw an error. The idea here is to create a new status "unknown" when we can't fetch the real status of a component. Fixes: https://tracker.ceph.com/issues/64712 Signed-off-by: Guillaume Abrioux --- diff --git a/src/ceph-node-proxy/ceph_node_proxy/util.py b/src/ceph-node-proxy/ceph_node_proxy/util.py index f6ed0fb483d6d..677161c63fd31 100644 --- a/src/ceph-node-proxy/ceph_node_proxy/util.py +++ b/src/ceph-node-proxy/ceph_node_proxy/util.py @@ -126,6 +126,8 @@ def normalize_dict(test_dict: Dict) -> Dict: if isinstance(test_dict[key], dict): res[key.lower()] = normalize_dict(test_dict[key]) else: + if test_dict[key] is None: + test_dict[key] = 'unknown' res[key.lower()] = test_dict[key] return res diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 235737ef10e76..966ffc0461c85 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -8,7 +8,7 @@ import logging import math import socket from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \ - NamedTuple, Type + NamedTuple, Type, ValuesView import orchestrator from ceph.deployment import inventory @@ -1485,10 +1485,12 @@ class NodeProxyCache: """ hostname = kw.get('hostname') hosts = [hostname] if hostname else self.data.keys() - mapper: Dict[bool, str] = { - True: 'error', - False: 'ok' - } + + def is_unknown(statuses: ValuesView) -> bool: + return any([status['status']['health'].lower() == 'unknown' for status in statuses]) and not is_error(statuses) + + def is_error(statuses: ValuesView) -> bool: + return any([status['status']['health'].lower() == 'error' for status in statuses]) _result: Dict[str, Any] = {} @@ -1496,9 +1498,15 @@ class NodeProxyCache: _result[host] = {} _result[host]['status'] = {} data = self.data[host] - for component, details in data['status'].items(): - res = any([member['status']['health'].lower() != 'ok' for member in data['status'][component].values()]) - _result[host]['status'][component] = mapper[res] + for component in data['status'].keys(): + values = data['status'][component].values() + if is_error(values): + state = 'error' + elif is_unknown(values): + state = 'unknown' + else: + state = 'ok' + _result[host]['status'][component] = state _result[host]['sn'] = data['sn'] _result[host]['host'] = data['host'] _result[host]['firmwares'] = data['firmwares']