From: Sage Weil Date: Fri, 26 Feb 2021 13:46:26 +0000 (-0500) Subject: mgr/cephadm: optional pass 'known' through to ok_to_stop X-Git-Tag: v16.2.0~106^2~106 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=7d3c850a789fb752db81bf58ed4bcd2c25fe2595;p=ceph.git mgr/cephadm: optional pass 'known' through to ok_to_stop Optionally provide a list of previously known-to-be-ok-to-stop items to the ok_to_stop method. This has to get plumbed through a zillion instances of this class method. No functional change (yet). Signed-off-by: Sage Weil (cherry picked from commit d9da769079a6a78efd1016af28e7bcf0b8ab830c) --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index abb160173cdec..25ba55e11d772 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1248,7 +1248,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, okay: bool = True for daemon_type, daemon_ids in daemon_map.items(): r = self.cephadm_services[daemon_type_to_service( - daemon_type)].ok_to_stop(daemon_ids, force) + daemon_type)].ok_to_stop(daemon_ids, force=force) if r.retval: okay = False # collect error notifications so user can see every daemon causing host diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 8538a875651d0..bfe7ee2ee212b 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -222,7 +222,10 @@ class CephadmService(metaclass=ABCMeta): except MonCommandFailed as e: logger.warning('Failed to set Dashboard config for %s: %s', service_name, e) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] out = f'It is presumed safe to stop {names}' err = f'It is NOT safe to stop {names}' @@ -544,7 +547,10 @@ class MgrService(CephService): num = len(mgr_map.get('standbys')) return bool(num) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # ok to stop if there is more than 1 mgr and not trying to stop the active mgr warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Mgr', 1, True) @@ -830,7 +836,10 @@ class RgwService(CephService): raise OrchestratorError(err) self.mgr.log.info('updated period') - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # if load balancer (ha-rgw) is present block if only 1 daemon up otherwise ok # if no load balancer, warn if > 1 daemon, block if only 1 daemon def ha_rgw_present() -> bool: @@ -881,7 +890,10 @@ class RbdMirrorService(CephService): return daemon_spec - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # if only 1 rbd-mirror, alert user (this is not passable with --force) warn, warn_message = self._enough_daemons_to_stop( self.TYPE, daemon_ids, 'Rbdmirror', 1, True) diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py index 84c7306479408..efa25430a4670 100644 --- a/src/pybind/mgr/cephadm/services/iscsi.py +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -1,7 +1,7 @@ import errno import json import logging -from typing import List, cast +from typing import List, cast, Optional from mgr_module import HandleCommandResult from ceph.deployment.service_spec import IscsiServiceSpec @@ -121,7 +121,10 @@ class IscsiService(CephService): get_set_cmd_dicts=get_set_cmd_dicts ) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # if only 1 iscsi, alert user (this is not passable with --force) warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True) if warn: diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index aa18b04e13af1..a95a515247e6c 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -1,7 +1,7 @@ import errno import logging import os -from typing import List, Any, Tuple, Dict, cast +from typing import List, Any, Tuple, Dict, Optional, cast from mgr_module import HandleCommandResult @@ -84,7 +84,10 @@ class GrafanaService(CephadmService): service_url ) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) if warn and not force: return HandleCommandResult(-errno.EBUSY, '', warn_message) @@ -175,7 +178,10 @@ class AlertmanagerService(CephadmService): service_url ) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) if warn and not force: return HandleCommandResult(-errno.EBUSY, '', warn_message) @@ -280,7 +286,10 @@ class PrometheusService(CephadmService): service_url ) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) if warn and not force: return HandleCommandResult(-errno.EBUSY, '', warn_message) @@ -299,7 +308,10 @@ class NodeExporterService(CephadmService): assert self.TYPE == daemon_spec.daemon_type return {}, [] - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # since node exporter runs on each host and cannot compromise data, no extra checks required names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] out = f'It is presumed safe to stop {names}' diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index 617d18798f426..760132b01212a 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -1,6 +1,6 @@ import errno import logging -from typing import Dict, Tuple, Any, List, cast +from typing import Dict, Tuple, Any, List, cast, Optional from mgr_module import HandleCommandResult @@ -156,7 +156,10 @@ class NFSService(CephService): super().post_remove(daemon) self.remove_rgw_keyring(daemon) - def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: # if only 1 nfs, alert user (this is not passable with --force) warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True) if warn: diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index fe32de328b6c4..189788cb28e49 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -213,7 +213,8 @@ class CephadmUpgrade: return True return False - def _wait_for_ok_to_stop(self, s: DaemonDescription) -> bool: + def _wait_for_ok_to_stop(self, s: DaemonDescription, + known: Optional[List[str]] = None) -> bool: # only wait a little bit; the service might go away for something assert s.daemon_type is not None assert s.daemon_id is not None