From ae83ffc7897a0912308001f55876fd51dd56ad40 Mon Sep 17 00:00:00 2001 From: Daniel Pivonka Date: Thu, 28 Jan 2021 16:36:11 -0500 Subject: [PATCH] mgr/cephadm: add DaemonDescriptionStatus Signed-off-by: Daniel Pivonka Signed-off-by: Sebastian Wagner --- src/pybind/mgr/cephadm/inventory.py | 2 +- src/pybind/mgr/cephadm/module.py | 4 ++-- src/pybind/mgr/cephadm/serve.py | 19 ++++++++++--------- .../mgr/cephadm/services/cephadmservice.py | 8 +++++--- src/pybind/mgr/orchestrator/__init__.py | 2 +- src/pybind/mgr/orchestrator/_interface.py | 17 +++++++++++++---- src/pybind/mgr/orchestrator/module.py | 10 +++++----- .../orchestrator/tests/test_orchestrator.py | 15 ++++++++++----- src/pybind/mgr/rook/module.py | 10 +++++----- 9 files changed, 52 insertions(+), 35 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index f54d3f2468b..468857db37b 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -518,7 +518,7 @@ class HostCache(): def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: dd = copy(dd_orig) if host in self.mgr.offline_hosts: - dd.status = -1 + dd.status = orchestrator.DaemonDescriptionStatus.error dd.status_desc = 'host is offline' dd.events = self.mgr.events.get_for_daemon(dd.name()) return dd diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 4085f6542d9..9dbdfb8d13f 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -34,7 +34,7 @@ from mgr_util import create_self_signed_cert import secrets import orchestrator from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \ - CLICommandMeta, DaemonDescription + CLICommandMeta, DaemonDescription, DaemonDescriptionStatus from orchestrator._interface import GenericSpec from orchestrator._interface import daemon_type_to_service @@ -1532,7 +1532,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, sm[n].rados_config_location = spec.rados_config_location() else: sm[n].size = 0 - if dd.status == 1: + if dd.status == DaemonDescriptionStatus.running: sm[n].running += 1 if not sm[n].last_refresh or not dd.last_refresh or dd.last_refresh < sm[n].last_refresh: # type: ignore sm[n].last_refresh = dd.last_refresh diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index d253f5e67ea..cd8b76a5749 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -19,12 +19,12 @@ from ceph.deployment.service_spec import ServiceSpec, HostPlacementSpec, \ from ceph.utils import str_to_datetime, datetime_now import orchestrator -from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent +from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \ + DaemonDescriptionStatus, daemon_type_to_service, service_to_daemon_types from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.schedule import HostAssignment from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ CephadmNoImage, CEPH_UPGRADE_ORDER, ContainerInspectInfo -from orchestrator._interface import daemon_type_to_service, service_to_daemon_types if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -206,7 +206,7 @@ class CephadmServe: health_changed = True failed_daemons = [] for dd in self.mgr.cache.get_daemons(): - if dd.status is not None and dd.status < 0: + if dd.status is not None and dd.status == DaemonDescriptionStatus.error: failed_daemons.append('daemon %s on %s is in %s state' % ( dd.name(), dd.hostname, dd.status_desc )) @@ -277,10 +277,10 @@ class CephadmServe: if 'state' in d: sd.status_desc = d['state'] sd.status = { - 'running': 1, - 'stopped': 0, - 'error': -1, - 'unknown': -1, + 'running': DaemonDescriptionStatus.running, + 'stopped': DaemonDescriptionStatus.stopped, + 'error': DaemonDescriptionStatus.error, + 'unknown': DaemonDescriptionStatus.error, }[d['state']] else: sd.status_desc = 'unknown' @@ -812,7 +812,8 @@ class CephadmServe: if not code and daemon_spec.host in self.mgr.cache.daemons: # prime cached service state with what we (should have) # just created - sd = daemon_spec.to_daemon_description(1, 'starting') + sd = daemon_spec.to_daemon_description( + DaemonDescriptionStatus.running, 'starting') self.mgr.cache.add_daemon(daemon_spec.host, sd) if daemon_spec.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']: self.mgr.requires_post_actions.add(daemon_spec.daemon_type) @@ -833,7 +834,7 @@ class CephadmServe: if not reconfig: # we have to clean up the daemon. E.g. keyrings. servict_type = daemon_type_to_service(daemon_spec.daemon_type) - dd = daemon_spec.to_daemon_description(-1, 'failed') + dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed') self.mgr.cephadm_services[servict_type].post_remove(dd) raise diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index a676728ab5d..a481f70299f 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -11,7 +11,7 @@ from mgr_module import HandleCommandResult, MonCommandFailed from ceph.deployment.service_spec import ServiceSpec, RGWSpec from ceph.deployment.utils import is_ipv6, unwrap_ipv6 -from orchestrator import OrchestratorError, DaemonDescription +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus from orchestrator._interface import daemon_type_to_service from cephadm import utils from mgr_util import ServerConfigException, verify_tls @@ -76,7 +76,7 @@ class CephadmDaemonDeploySpec: return files - def to_daemon_description(self, status: int, status_desc: str) -> DaemonDescription: + def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription: return DaemonDescription( daemon_type=self.daemon_type, daemon_id=self.daemon_id, @@ -249,7 +249,9 @@ class CephadmService(metaclass=ABCMeta): # Provides a warning about if it possible or not to stop daemons in a service names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids] number_of_running_daemons = len( - [daemon for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) if daemon.status == 1]) + [daemon + for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) + if daemon.status == DaemonDescriptionStatus.running]) if (number_of_running_daemons - len(daemon_ids)) >= low_limit: return False, f'It is presumed safe to stop {names}' diff --git a/src/pybind/mgr/orchestrator/__init__.py b/src/pybind/mgr/orchestrator/__init__.py index 367f7b55d42..63f93092f2c 100644 --- a/src/pybind/mgr/orchestrator/__init__.py +++ b/src/pybind/mgr/orchestrator/__init__.py @@ -9,7 +9,7 @@ from ._interface import \ Orchestrator, OrchestratorClientMixin, \ OrchestratorValidationError, OrchestratorError, NoOrchestrator, \ ServiceDescription, InventoryFilter, HostSpec, \ - DaemonDescription, \ + DaemonDescription, DaemonDescriptionStatus, \ OrchestratorEvent, set_exception_subject, \ InventoryHost, DeviceLightLoc, \ UpgradeStatusSpec, daemon_type_to_service, service_to_daemon_types diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 7b5f03ddfdd..e7dd12e6f27 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -7,6 +7,7 @@ Please see the ceph-mgr module developer's guide for more information. import copy import datetime +import enum import errno import logging import pickle @@ -1255,6 +1256,12 @@ def handle_type_error(method: FuncT) -> FuncT: return cast(FuncT, inner) +class DaemonDescriptionStatus(enum.IntEnum): + error = -1 + stopped = 0 + running = 1 + + class DaemonDescription(object): """ For responding to queries about the status of a particular daemon, @@ -1277,7 +1284,7 @@ class DaemonDescription(object): container_image_name: Optional[str] = None, container_image_digests: Optional[List[str]] = None, version: Optional[str] = None, - status: Optional[int] = None, + status: Optional[DaemonDescriptionStatus] = None, status_desc: Optional[str] = None, last_refresh: Optional[datetime.datetime] = None, created: Optional[datetime.datetime] = None, @@ -1316,7 +1323,7 @@ class DaemonDescription(object): # Service status: -1 error, 0 stopped, 1 running self.status = status - # Service status description when status == -1. + # Service status description when status == error. self.status_desc = status_desc # datetime when this info was last refreshed @@ -1416,7 +1423,7 @@ class DaemonDescription(object): out['container_image_name'] = self.container_image_name out['container_image_digests'] = self.container_image_digests out['version'] = self.version - out['status'] = self.status + out['status'] = self.status.value if self.status is not None else None out['status_desc'] = self.status_desc if self.daemon_type == 'osd': out['osdspec_affinity'] = self.osdspec_affinity @@ -1445,7 +1452,9 @@ class DaemonDescription(object): if k in c: c[k] = str_to_datetime(c[k]) events = [OrchestratorEvent.from_json(e) for e in event_strs] - return cls(events=events, **c) + status_int = c.pop('status', None) + status = DaemonDescriptionStatus(status_int) if status_int is not None else None + return cls(events=events, status=status, **c) def __copy__(self) -> 'DaemonDescription': # feel free to change this: diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index a5d36cd01b8..ed1f75da50a 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -21,7 +21,7 @@ from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_comma NoOrchestrator, OrchestratorValidationError, NFSServiceSpec, \ RGWSpec, InventoryFilter, InventoryHost, HostSpec, CLICommandMeta, \ ServiceDescription, DaemonDescription, IscsiServiceSpec, json_to_generic_spec, \ - GenericSpec + GenericSpec, DaemonDescriptionStatus def nice_delta(now: datetime.datetime, t: Optional[datetime.datetime], suffix: str = '') -> str: @@ -648,12 +648,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, status = s.status_desc else: status = { - -1: 'error', - 0: 'stopped', - 1: 'running', + DaemonDescriptionStatus.error: 'error', + DaemonDescriptionStatus.stopped: 'stopped', + DaemonDescriptionStatus.running: 'running', None: '' }[s.status] - if s.status == 1 and s.started: + if s.status == DaemonDescriptionStatus.running and s.started: status += ' (%s)' % to_pretty_timedelta(now - s.started) table.add_row(( diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py index 4f215f8e7ac..96ce7436b64 100644 --- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py +++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py @@ -14,7 +14,7 @@ from test_orchestrator import TestOrchestrator as _TestOrchestrator from tests import mock from orchestrator import raise_if_exception, Completion, ProgressReference -from orchestrator import InventoryHost, DaemonDescription, ServiceDescription +from orchestrator import InventoryHost, DaemonDescription, ServiceDescription, DaemonDescriptionStatus from orchestrator import OrchestratorValidationError from orchestrator.module import to_format, Format, OrchestratorCli, preview_table_osd @@ -22,13 +22,14 @@ from orchestrator.module import to_format, Format, OrchestratorCli, preview_tabl def _test_resource(data, resource_class, extra=None): # ensure we can deserialize and serialize rsc = resource_class.from_json(data) - rsc.to_json() + assert rsc.to_json() == resource_class.from_json(rsc.to_json()).to_json() if extra: # if there is an unexpected data provided - data.update(extra) + data_copy = data.copy() + data_copy.update(extra) with pytest.raises(OrchestratorValidationError): - resource_class.from_json(data) + resource_class.from_json(data_copy) def test_inventory(): @@ -62,10 +63,14 @@ def test_daemon_description(): json_data = { 'hostname': 'test', 'daemon_type': 'mon', - 'daemon_id': 'a' + 'daemon_id': 'a', + 'status': -1, } _test_resource(json_data, DaemonDescription, {'abc': False}) + dd = DaemonDescription.from_json(json_data) + assert dd.status.value == DaemonDescriptionStatus.error.value + def test_raise(): c = Completion() diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 1ad1682fd06..1db6818a339 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -428,11 +428,11 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): sd.hostname = p['hostname'] sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '') status = { - 'Pending': -1, - 'Running': 1, - 'Succeeded': 0, - 'Failed': -1, - 'Unknown': -1, + 'Pending': orchestrator.DaemonDescriptionStatus.error, + 'Running': orchestrator.DaemonDescriptionStatus.running, + 'Succeeded': orchestrator.DaemonDescriptionStatus.stopped, + 'Failed': orchestrator.DaemonDescriptionStatus.error, + 'Unknown': orchestrator.DaemonDescriptionStatus.error, }[p['phase']] sd.status = status sd.status_desc = p['phase'] -- 2.39.5