From 4d467515da992bc3fe6c254740ae6b44d66282f8 Mon Sep 17 00:00:00 2001 From: Daniel-Pivonka Date: Thu, 14 Jan 2021 17:18:43 -0500 Subject: [PATCH] mgr/cephadm: add ok-to-stop functions for ceph client services Signed-off-by: Daniel-Pivonka Signed-off-by: Adam King (cherry picked from commit 674912bfed92537a97e625bb79397bf97f10b24b) --- src/pybind/mgr/cephadm/inventory.py | 4 +- .../mgr/cephadm/services/cephadmservice.py | 38 +++++++++++++++++++ src/pybind/mgr/cephadm/services/iscsi.py | 22 ++++++++++- src/pybind/mgr/cephadm/services/nfs.py | 18 +++++++++ 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 5c4d8f767c0b4..7edef7282812c 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -9,7 +9,7 @@ import orchestrator from ceph.deployment import inventory from ceph.deployment.service_spec import ServiceSpec from ceph.utils import str_to_datetime, datetime_to_str, datetime_now -from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent +from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types if TYPE_CHECKING: from .module import CephadmOrchestrator @@ -545,7 +545,7 @@ class HostCache(): result = [] # type: List[orchestrator.DaemonDescription] for host, dm in self.daemons.items(): for name, d in dm.items(): - if d.daemon_type == service_type: + if d.daemon_type in service_to_daemon_types(service_type): result.append(d) return result diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index a481f70299fa1..8538a875651d0 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -830,6 +830,36 @@ class RgwService(CephService): raise OrchestratorError(err) self.mgr.log.info('updated period') + def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + # if load balancer (ha-rgw) is present block if only 1 daemon up otherwise ok + # if no load balancer, warn if > 1 daemon, block if only 1 daemon + def ha_rgw_present() -> bool: + running_ha_rgw_daemons = [ + daemon for daemon in self.mgr.cache.get_daemons_by_type('ha-rgw') if daemon.status == 1] + running_haproxy_daemons = [ + daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'haproxy'] + running_keepalived_daemons = [ + daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'keepalived'] + # check that there is at least one haproxy and keepalived daemon running + if running_haproxy_daemons and running_keepalived_daemons: + return True + return False + + # if only 1 rgw, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 rgw daemon. + # Say okay if load balancer present or force flag set + if ha_rgw_present() or force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 RGW daemon, no load balancer and no force flag. + # Provide warning + warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + class RbdMirrorService(CephService): TYPE = 'rbd-mirror' @@ -851,6 +881,14 @@ class RbdMirrorService(CephService): return daemon_spec + def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + # if only 1 rbd-mirror, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop( + self.TYPE, daemon_ids, 'Rbdmirror', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + class CrashService(CephService): TYPE = 'crash' diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py index 94e7741f8841b..84c7306479408 100644 --- a/src/pybind/mgr/cephadm/services/iscsi.py +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -1,10 +1,12 @@ +import errno import json import logging from typing import List, cast +from mgr_module import HandleCommandResult from ceph.deployment.service_spec import IscsiServiceSpec -from orchestrator import DaemonDescription +from orchestrator import DaemonDescription, DaemonDescriptionStatus from .cephadmservice import CephadmDaemonDeploySpec, CephService from .. import utils @@ -118,3 +120,21 @@ class IscsiService(CephService): get_cmd='dashboard iscsi-gateway-list', get_set_cmd_dicts=get_set_cmd_dicts ) + + def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + # if only 1 iscsi, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. make sure none are down + warn_message = ( + 'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one') + iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + for i in iscsi_daemons: + if i.status != DaemonDescriptionStatus.running: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + warn_message = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, warn_message, '') diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index b82c6e9127718..617d18798f426 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -1,6 +1,9 @@ +import errno import logging from typing import Dict, Tuple, Any, List, cast +from mgr_module import HandleCommandResult + from ceph.deployment.service_spec import NFSServiceSpec import rados @@ -152,3 +155,18 @@ class NFSService(CephService): def post_remove(self, daemon: DaemonDescription) -> None: super().post_remove(daemon) self.remove_rgw_keyring(daemon) + + def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult: + # if only 1 nfs, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. + if force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 nfs daemon and no force flag. + # Provide warning + warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) -- 2.39.5