From 4d467515da992bc3fe6c254740ae6b44d66282f8 Mon Sep 17 00:00:00 2001
From: Daniel-Pivonka <dpivonka@redhat.com>
Date: Thu, 14 Jan 2021 17:18:43 -0500
Subject: [PATCH] mgr/cephadm: add ok-to-stop functions for ceph client
 services

Signed-off-by: Daniel-Pivonka <dpivonka@redhat.com>
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 674912bfed92537a97e625bb79397bf97f10b24b)
---
 src/pybind/mgr/cephadm/inventory.py           |  4 +-
 .../mgr/cephadm/services/cephadmservice.py    | 38 +++++++++++++++++++
 src/pybind/mgr/cephadm/services/iscsi.py      | 22 ++++++++++-
 src/pybind/mgr/cephadm/services/nfs.py        | 18 +++++++++
 4 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py
index 5c4d8f767c0b4..7edef7282812c 100644
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -9,7 +9,7 @@ import orchestrator
 from ceph.deployment import inventory
 from ceph.deployment.service_spec import ServiceSpec
 from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
-from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent
+from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
 
 if TYPE_CHECKING:
     from .module import CephadmOrchestrator
@@ -545,7 +545,7 @@ class HostCache():
         result = []   # type: List[orchestrator.DaemonDescription]
         for host, dm in self.daemons.items():
             for name, d in dm.items():
-                if d.daemon_type == service_type:
+                if d.daemon_type in service_to_daemon_types(service_type):
                     result.append(d)
         return result
 
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
index a481f70299fa1..8538a875651d0 100644
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -830,6 +830,36 @@ class RgwService(CephService):
                 raise OrchestratorError(err)
             self.mgr.log.info('updated period')
 
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if load balancer (ha-rgw) is present block if only 1 daemon up otherwise ok
+        # if no load balancer, warn if > 1 daemon, block if only 1 daemon
+        def ha_rgw_present() -> bool:
+            running_ha_rgw_daemons = [
+                daemon for daemon in self.mgr.cache.get_daemons_by_type('ha-rgw') if daemon.status == 1]
+            running_haproxy_daemons = [
+                daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'haproxy']
+            running_keepalived_daemons = [
+                daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'keepalived']
+            # check that there is at least one haproxy and keepalived daemon running
+            if running_haproxy_daemons and running_keepalived_daemons:
+                return True
+            return False
+
+        # if only 1 rgw, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 rgw daemon.
+        # Say okay if load balancer present or force flag set
+        if ha_rgw_present() or force:
+            return HandleCommandResult(0, warn_message, '')
+
+        # if reached here, > 1 RGW daemon, no load balancer and no force flag.
+        # Provide warning
+        warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. "
+        return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
 
 class RbdMirrorService(CephService):
     TYPE = 'rbd-mirror'
@@ -851,6 +881,14 @@ class RbdMirrorService(CephService):
 
         return daemon_spec
 
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 rbd-mirror, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(
+            self.TYPE, daemon_ids, 'Rbdmirror', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+        return HandleCommandResult(0, warn_message, '')
+
 
 class CrashService(CephService):
     TYPE = 'crash'
diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py
index 94e7741f8841b..84c7306479408 100644
--- a/src/pybind/mgr/cephadm/services/iscsi.py
+++ b/src/pybind/mgr/cephadm/services/iscsi.py
@@ -1,10 +1,12 @@
+import errno
 import json
 import logging
 from typing import List, cast
 
+from mgr_module import HandleCommandResult
 from ceph.deployment.service_spec import IscsiServiceSpec
 
-from orchestrator import DaemonDescription
+from orchestrator import DaemonDescription, DaemonDescriptionStatus
 from .cephadmservice import CephadmDaemonDeploySpec, CephService
 from .. import utils
 
@@ -118,3 +120,21 @@ class IscsiService(CephService):
             get_cmd='dashboard iscsi-gateway-list',
             get_set_cmd_dicts=get_set_cmd_dicts
         )
+
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 iscsi, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 nfs daemon. make sure none are down
+        warn_message = (
+            'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one')
+        iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE)
+        for i in iscsi_daemons:
+            if i.status != DaemonDescriptionStatus.running:
+                return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
+        warn_message = f'It is presumed safe to stop {names}'
+        return HandleCommandResult(0, warn_message, '')
diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py
index b82c6e9127718..617d18798f426 100644
--- a/src/pybind/mgr/cephadm/services/nfs.py
+++ b/src/pybind/mgr/cephadm/services/nfs.py
@@ -1,6 +1,9 @@
+import errno
 import logging
 from typing import Dict, Tuple, Any, List, cast
 
+from mgr_module import HandleCommandResult
+
 from ceph.deployment.service_spec import NFSServiceSpec
 import rados
 
@@ -152,3 +155,18 @@ class NFSService(CephService):
     def post_remove(self, daemon: DaemonDescription) -> None:
         super().post_remove(daemon)
         self.remove_rgw_keyring(daemon)
+
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 nfs, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 nfs daemon.
+        if force:
+            return HandleCommandResult(0, warn_message, '')
+
+        # if reached here, > 1 nfs daemon and no force flag.
+        # Provide warning
+        warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. "
+        return HandleCommandResult(-errno.EBUSY, '', warn_message)
-- 
2.39.5