From: Sebastian Wagner Date: Thu, 9 Sep 2021 10:30:42 +0000 (+0200) Subject: mgr/orch: Add DaemonDescriptionStatus `starting` and `unknown` X-Git-Tag: v16.2.8~273^2~25 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0cbc806d68eb2078e7a078ee0e9b62cca668582b;p=ceph.git mgr/orch: Add DaemonDescriptionStatus `starting` and `unknown` Signed-off-by: Sebastian Wagner (cherry picked from commit 501ecf035e982af6f2020b3bb77d0ac505313b73) --- diff --git a/doc/mgr/orchestrator_modules.rst b/doc/mgr/orchestrator_modules.rst index 5991afe61365..a28b43059d21 100644 --- a/doc/mgr/orchestrator_modules.rst +++ b/doc/mgr/orchestrator_modules.rst @@ -252,6 +252,9 @@ Daemons .. automethod:: Orchestrator.remove_daemons .. automethod:: Orchestrator.daemon_action +.. autoclass:: DaemonDescription +.. autoclass:: DaemonDescriptionStatus + OSD management -------------- diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 1f77898e2511..85588f381f38 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -763,7 +763,6 @@ class HostCache(): # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses # could be wrong. We must assume maintenance is working and daemons are stopped dd.status = orchestrator.DaemonDescriptionStatus.stopped - dd.status_desc = 'stopped' dd.events = self.mgr.events.get_for_daemon(dd.name()) return dd diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index e50c0070f808..adea058d7766 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1095,7 +1095,7 @@ class CephadmServe: # prime cached service state with what we (should have) # just created sd = daemon_spec.to_daemon_description( - DaemonDescriptionStatus.running, 'starting') + DaemonDescriptionStatus.starting, 'starting') self.mgr.cache.add_daemon(daemon_spec.host, sd) if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS: self.mgr.requires_post_actions.add(daemon_spec.name()) diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index 6a31395291fa..3a3d7c139063 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -11,7 +11,7 @@ except ImportError: pass from cephadm import CephadmOrchestrator -from orchestrator import raise_if_exception, OrchResult, HostSpec +from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus from tests import mock @@ -105,7 +105,7 @@ def assert_rm_service(cephadm: CephadmOrchestrator, srv_name): @contextmanager -def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '') -> Iterator[List[str]]: +def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]: if spec.placement.is_empty() and host: spec.placement = PlacementSpec(hosts=[host], count=1) if meth is not None: @@ -120,6 +120,9 @@ def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=No CephadmServe(cephadm_module)._apply_all_services() + if status_running: + make_daemons_running(cephadm_module, spec.service_name()) + dds = wait(cephadm_module, cephadm_module.list_daemons()) own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] if host: @@ -130,6 +133,12 @@ def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=No assert_rm_service(cephadm_module, spec.service_name()) +def make_daemons_running(cephadm_module, service_name): + own_dds = cephadm_module.cache.get_daemons_by_service(service_name) + for dd in own_dds: + dd.status = DaemonDescriptionStatus.running # We're changing the reference + + def _deploy_cephadm_binary(host): def foo(*args, **kwargs): return True diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 22113eebd554..d20787a311d5 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -23,7 +23,7 @@ from orchestrator import DaemonDescription, InventoryHost, \ HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent from tests import mock from .fixtures import wait, _run_cephadm, match_glob, with_host, \ - with_cephadm_module, with_service, _deploy_cephadm_binary + with_cephadm_module, with_service, _deploy_cephadm_binary, make_daemons_running from cephadm.module import CephadmOrchestrator """ @@ -159,14 +159,16 @@ class TestCephadm(object): 'service_name': 'mds.name', 'daemon_type': 'mds', 'hostname': 'test', - 'status': 1, + 'status': 2, 'status_desc': 'starting', 'is_active': False, 'ports': [], } ] - with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), CephadmOrchestrator.apply_rgw, 'test'): + with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), + CephadmOrchestrator.apply_rgw, 'test', status_running=True): + make_daemons_running(cephadm_module, 'mds.name') c = cephadm_module.describe_service() out = [dict(o.to_json()) for o in wait(cephadm_module, c)] @@ -200,8 +202,10 @@ class TestCephadm(object): def test_service_ls_service_type_flag(self, cephadm_module): with with_host(cephadm_module, 'host1'): with with_host(cephadm_module, 'host2'): - with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), CephadmOrchestrator.apply_mgr, ''): - with with_service(cephadm_module, ServiceSpec('mds', 'test-id', placement=PlacementSpec(count=2)), CephadmOrchestrator.apply_mds, ''): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True): + with with_service(cephadm_module, ServiceSpec('mds', 'test-id', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mds, '', status_running=True): # with no service-type. Should provide info fot both services c = cephadm_module.describe_service() @@ -917,13 +921,15 @@ class TestCephadm(object): with with_host(cephadm_module, 'host2'): with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription + CephadmServe(cephadm_module)._check_for_moved_osds() + # both are in status "starting" + assert len(cephadm_module.cache.get_daemons()) == 2 + dd1.status = DaemonDescriptionStatus.running dd2.status = DaemonDescriptionStatus.error cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1}) cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2}) - CephadmServe(cephadm_module)._check_for_moved_osds() - assert len(cephadm_module.cache.get_daemons()) == 1 assert cephadm_module.events.get_for_daemon('osd.1') == [ diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py index df1cd3b563e7..3699b6eb75e7 100644 --- a/src/pybind/mgr/cephadm/tests/test_upgrade.py +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -15,7 +15,7 @@ from .fixtures import _run_cephadm, wait, with_host, with_service def test_upgrade_start(cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): with with_host(cephadm_module, 'test2'): - with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2))): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True): assert wait(cephadm_module, cephadm_module.upgrade_start( 'image_id', None)) == 'Initiating upgrade to docker.io/image_id' @@ -43,7 +43,8 @@ def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'host2'): cephadm_module.set_container_image('global', 'from_image') cephadm_module.use_repo_digest = use_repo_digest - with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), CephadmOrchestrator.apply_mgr, ''),\ + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True),\ mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name", return_value='foo'),\ mock.patch("cephadm.module.CephadmOrchestrator.version", diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index efaa7dfc3d3c..149a7dd72c4f 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -774,9 +774,23 @@ def handle_type_error(method: FuncT) -> FuncT: class DaemonDescriptionStatus(enum.IntEnum): + unknown = -2 error = -1 stopped = 0 running = 1 + starting = 2 #: Daemon is deployed, but not yet running + + @staticmethod + def to_str(status: Optional['DaemonDescriptionStatus']) -> str: + if status is None: + status = DaemonDescriptionStatus.unknown + return { + DaemonDescriptionStatus.unknown: 'unknown', + DaemonDescriptionStatus.error: 'error', + DaemonDescriptionStatus.stopped: 'stopped', + DaemonDescriptionStatus.running: 'running', + DaemonDescriptionStatus.starting: 'starting', + }.get(status, '') class DaemonDescription(object): @@ -822,7 +836,7 @@ class DaemonDescription(object): rank_generation: Optional[int] = None, ) -> None: - # Host is at the same granularity as InventoryHost + #: Host is at the same granularity as InventoryHost self.hostname: Optional[str] = hostname # Not everyone runs in containers, but enough people do to @@ -833,32 +847,32 @@ class DaemonDescription(object): self.container_image_name = container_image_name # image friendly name self.container_image_digests = container_image_digests # reg hashes - # The type of service (osd, mon, mgr, etc.) + #: The type of service (osd, mon, mgr, etc.) self.daemon_type = daemon_type - # The orchestrator will have picked some names for daemons, - # typically either based on hostnames or on pod names. - # This is the in mds., the ID that will appear - # in the FSMap/ServiceMap. + #: The orchestrator will have picked some names for daemons, + #: typically either based on hostnames or on pod names. + #: This is the in mds., the ID that will appear + #: in the FSMap/ServiceMap. self.daemon_id: Optional[str] = daemon_id self.daemon_name = self.name() - # Some daemon types have a numeric rank assigned + #: Some daemon types have a numeric rank assigned self.rank: Optional[int] = rank self.rank_generation: Optional[int] = rank_generation self._service_name: Optional[str] = service_name - # Service version that was deployed + #: Service version that was deployed self.version = version - # Service status: -1 error, 0 stopped, 1 running - self.status = status + # Service status: -2 unknown, -1 error, 0 stopped, 1 running, 2 starting + self._status = status - # Service status description when status == error. + #: Service status description when status == error. self.status_desc = status_desc - # datetime when this info was last refreshed + #: datetime when this info was last refreshed self.last_refresh: Optional[datetime.datetime] = last_refresh self.created: Optional[datetime.datetime] = created @@ -866,7 +880,7 @@ class DaemonDescription(object): self.last_configured: Optional[datetime.datetime] = last_configured self.last_deployed: Optional[datetime.datetime] = last_deployed - # Affinity to a certain OSDSpec + #: Affinity to a certain OSDSpec self.osdspec_affinity: Optional[str] = osdspec_affinity self.events: List[OrchestratorEvent] = events or [] @@ -882,6 +896,15 @@ class DaemonDescription(object): self.is_active = is_active + @property + def status(self) -> Optional[DaemonDescriptionStatus]: + return self._status + + @status.setter + def status(self, new: DaemonDescriptionStatus) -> None: + self._status = new + self.status_desc = DaemonDescriptionStatus.to_str(new) + def get_port_summary(self) -> str: if not self.ports: return '' diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 7d03cdceba25..ff3765e515cf 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -662,13 +662,8 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, if s.status_desc: status = s.status_desc else: - status = { - DaemonDescriptionStatus.error: 'error', - DaemonDescriptionStatus.stopped: 'stopped', - DaemonDescriptionStatus.running: 'running', - None: '' - }[s.status] - if s.status == DaemonDescriptionStatus.running and s.started: + status = DaemonDescriptionStatus.to_str(s.status) + if s.status == DaemonDescriptionStatus.running and s.started: # See DDS.starting status += ' (%s)' % to_pretty_timedelta(now - s.started) table.add_row(( diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 964d5548ada8..70512567ab4c 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -370,14 +370,13 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): sd.hostname = p['hostname'] sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '') status = { - 'Pending': orchestrator.DaemonDescriptionStatus.error, + 'Pending': orchestrator.DaemonDescriptionStatus.starting, 'Running': orchestrator.DaemonDescriptionStatus.running, 'Succeeded': orchestrator.DaemonDescriptionStatus.stopped, 'Failed': orchestrator.DaemonDescriptionStatus.error, - 'Unknown': orchestrator.DaemonDescriptionStatus.error, + 'Unknown': orchestrator.DaemonDescriptionStatus.unknown, }[p['phase']] sd.status = status - sd.status_desc = p['phase'] if 'ceph_daemon_id' in p['labels']: sd.daemon_id = p['labels']['ceph_daemon_id']