From 919fb3b4a276242e6d1540bc15e66c685b1c92b4 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 26 Mar 2020 14:30:37 +0100 Subject: [PATCH] mgr/cephadm: Daemons running of unreachable servers are offline Fixes: https://tracker.ceph.com/issues/44602 Signed-off-by: Sebastian Wagner (cherry picked from commit adca3e08e8e9f4a683f7c6cef8e96705cc340188) --- src/pybind/mgr/cephadm/module.py | 18 +++++++++++++++--- src/pybind/mgr/orchestrator/_interface.py | 4 ++++ src/pybind/mgr/orchestrator/module.py | 19 +++++++++++-------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index d7b6b0f76509b..b730888e5c4eb 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2,7 +2,7 @@ import json import errno import logging import time -import yaml +from copy import copy from threading import Event from functools import wraps @@ -320,6 +320,18 @@ class HostCache(): r.append(dd) return r + def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]: + for host, dm in self.daemons.items(): + if host in self.mgr.offline_hosts: + def set_offline(dd: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: + ret = copy(dd) + ret.status = -1 + ret.status_desc = 'host is offline' + return ret + yield host, {name: set_offline(d) for name, d in dm.items()} + else: + yield host, dm + def get_daemons_by_service(self, service_name): # type: (str) -> List[orchestrator.DaemonDescription] result = [] # type: List[orchestrator.DaemonDescription] @@ -1854,7 +1866,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self._refresh_host_daemons(host) # sm = {} # type: Dict[str, orchestrator.ServiceDescription] - for h, dm in self.cache.daemons.items(): + for h, dm in self.cache.get_daemons_with_volatile_status(): for name, dd in dm.items(): if service_type and service_type != dd.daemon_type: continue @@ -1925,7 +1937,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): for hostname, hi in self.inventory.items(): self._refresh_host_daemons(hostname) result = [] - for h, dm in self.cache.daemons.items(): + for h, dm in self.cache.get_daemons_with_volatile_status(): if host and h != host: continue for name, dd in dm.items(): diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index a02572a074e3c..df9b94082202c 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1323,6 +1323,10 @@ class DaemonDescription(object): c[k] = datetime.datetime.strptime(c[k], DATEFMT) return cls(**c) + def __copy__(self): + # feel free to change this: + return DaemonDescription.from_json(self.to_json()) + class ServiceDescription(object): """ For responding to queries about the status of a particular service, diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index d3dbf645fdfad..97a8a4962830a 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -25,7 +25,7 @@ from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_comma raise_if_exception, _cli_write_command, TrivialReadCompletion, OrchestratorError, \ NoOrchestrator, OrchestratorValidationError, NFSServiceSpec, \ RGWSpec, InventoryFilter, InventoryHost, HostSpec, CLICommandMeta, \ - ServiceDescription, IscsiServiceSpec + ServiceDescription, DaemonDescription, IscsiServiceSpec def nice_delta(now, t, suffix=''): if t: @@ -410,7 +410,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule): refresh=refresh) self._orchestrator_wait([completion]) raise_if_exception(completion) - daemons = completion.result + daemons: List[DaemonDescription] = completion.result def ukn(s): return '' if s is None else s @@ -432,12 +432,15 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule): table.left_padding_width = 0 table.right_padding_width = 2 for s in sorted(daemons, key=lambda s: s.name()): - status = { - -1: 'error', - 0: 'stopped', - 1: 'running', - None: '' - }[s.status] + if s.status_desc: + status = s.status_desc + else: + status = { + -1: 'error', + 0: 'stopped', + 1: 'running', + None: '' + }[s.status] if s.status == 1 and s.started: status += ' (%s)' % to_pretty_timedelta(now - s.started) -- 2.39.5