From: Sebastian Wagner Date: Thu, 26 Mar 2020 13:30:37 +0000 (+0100) Subject: mgr/cephadm: Daemons running of unreachable servers are offline X-Git-Tag: wip-pdonnell-testing-20200918.022351~1567^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=adca3e08e8e9f4a683f7c6cef8e96705cc340188;p=ceph-ci.git mgr/cephadm: Daemons running of unreachable servers are offline Fixes: https://tracker.ceph.com/issues/44602 Signed-off-by: Sebastian Wagner --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 1c4315874c2..19cc28034d3 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2,7 +2,7 @@ import json import errno import logging import time -import yaml +from copy import copy from threading import Event from functools import wraps @@ -320,6 +320,18 @@ class HostCache(): r.append(dd) return r + def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]: + for host, dm in self.daemons.items(): + if host in self.mgr.offline_hosts: + def set_offline(dd: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: + ret = copy(dd) + ret.status = -1 + ret.status_desc = 'host is offline' + return ret + yield host, {name: set_offline(d) for name, d in dm.items()} + else: + yield host, dm + def get_daemons_by_service(self, service_name): # type: (str) -> List[orchestrator.DaemonDescription] result = [] # type: List[orchestrator.DaemonDescription] @@ -1854,7 +1866,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self._refresh_host_daemons(host) # sm = {} # type: Dict[str, orchestrator.ServiceDescription] - for h, dm in self.cache.daemons.items(): + for h, dm in self.cache.get_daemons_with_volatile_status(): for name, dd in dm.items(): if service_type and service_type != dd.daemon_type: continue @@ -1919,7 +1931,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): for hostname, hi in self.inventory.items(): self._refresh_host_daemons(hostname) result = [] - for h, dm in self.cache.daemons.items(): + for h, dm in self.cache.get_daemons_with_volatile_status(): if host and h != host: continue for name, dd in dm.items(): diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index a02572a074e..df9b9408220 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1323,6 +1323,10 @@ class DaemonDescription(object): c[k] = datetime.datetime.strptime(c[k], DATEFMT) return cls(**c) + def __copy__(self): + # feel free to change this: + return DaemonDescription.from_json(self.to_json()) + class ServiceDescription(object): """ For responding to queries about the status of a particular service, diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index d3dbf645fdf..97a8a496283 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -25,7 +25,7 @@ from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_comma raise_if_exception, _cli_write_command, TrivialReadCompletion, OrchestratorError, \ NoOrchestrator, OrchestratorValidationError, NFSServiceSpec, \ RGWSpec, InventoryFilter, InventoryHost, HostSpec, CLICommandMeta, \ - ServiceDescription, IscsiServiceSpec + ServiceDescription, DaemonDescription, IscsiServiceSpec def nice_delta(now, t, suffix=''): if t: @@ -410,7 +410,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule): refresh=refresh) self._orchestrator_wait([completion]) raise_if_exception(completion) - daemons = completion.result + daemons: List[DaemonDescription] = completion.result def ukn(s): return '' if s is None else s @@ -432,12 +432,15 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule): table.left_padding_width = 0 table.right_padding_width = 2 for s in sorted(daemons, key=lambda s: s.name()): - status = { - -1: 'error', - 0: 'stopped', - 1: 'running', - None: '' - }[s.status] + if s.status_desc: + status = s.status_desc + else: + status = { + -1: 'error', + 0: 'stopped', + 1: 'running', + None: '' + }[s.status] if s.status == 1 and s.started: status += ' (%s)' % to_pretty_timedelta(now - s.started)