mgr/cephadm: add ok-to-stop functions for ceph client services

author Daniel-Pivonka <dpivonka@redhat.com>

Thu, 14 Jan 2021 22:18:43 +0000 (17:18 -0500)

committer Sebastian Wagner <sebastian.wagner@suse.com>

Tue, 9 Mar 2021 14:29:32 +0000 (15:29 +0100)
author Daniel-Pivonka <dpivonka@redhat.com>
Thu, 14 Jan 2021 22:18:43 +0000 (17:18 -0500)
committer Sebastian Wagner <sebastian.wagner@suse.com>
Tue, 9 Mar 2021 14:29:32 +0000 (15:29 +0100)
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py

index 5c4d8f767c0b4b6ae8c3a9c8d6f03e6cafe8a5c5..7edef7282812cf50f605c5048d78ef1524692252 100644 (file)
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -9,7 +9,7 @@ import orchestrator
  from ceph.deployment import inventory
  from ceph.deployment.service_spec import ServiceSpec
  from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
-from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent
+from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
  
  if TYPE_CHECKING:
      from .module import CephadmOrchestrator
@@ -545,7 +545,7 @@ class HostCache():
          result = []   # type: List[orchestrator.DaemonDescription]
          for host, dm in self.daemons.items():
              for name, d in dm.items():
-                if d.daemon_type == service_type:
+                if d.daemon_type in service_to_daemon_types(service_type):
                      result.append(d)
          return result
  
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py

index a481f70299fa1177e55e9eaa46a25fde728c2c62..8538a875651d0f3af7123ac2c04ee57fe3041321 100644 (file)
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -830,6 +830,36 @@ class RgwService(CephService):
                  raise OrchestratorError(err)
              self.mgr.log.info('updated period')
  
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if load balancer (ha-rgw) is present block if only 1 daemon up otherwise ok
+        # if no load balancer, warn if > 1 daemon, block if only 1 daemon
+        def ha_rgw_present() -> bool:
+            running_ha_rgw_daemons = [
+                daemon for daemon in self.mgr.cache.get_daemons_by_type('ha-rgw') if daemon.status == 1]
+            running_haproxy_daemons = [
+                daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'haproxy']
+            running_keepalived_daemons = [
+                daemon for daemon in running_ha_rgw_daemons if daemon.daemon_type == 'keepalived']
+            # check that there is at least one haproxy and keepalived daemon running
+            if running_haproxy_daemons and running_keepalived_daemons:
+                return True
+            return False
+
+        # if only 1 rgw, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 rgw daemon.
+        # Say okay if load balancer present or force flag set
+        if ha_rgw_present() or force:
+            return HandleCommandResult(0, warn_message, '')
+
+        # if reached here, > 1 RGW daemon, no load balancer and no force flag.
+        # Provide warning
+        warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. "
+        return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
  
  class RbdMirrorService(CephService):
      TYPE = 'rbd-mirror'
@@ -851,6 +881,14 @@ class RbdMirrorService(CephService):
  
          return daemon_spec
  
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 rbd-mirror, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(
+            self.TYPE, daemon_ids, 'Rbdmirror', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+        return HandleCommandResult(0, warn_message, '')
+
  
  class CrashService(CephService):
      TYPE = 'crash'
diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py

index 94e7741f8841b70a9cf813aaa7026b4970a11c05..84c730647940855b9c2161c8d58b6fe82abf0a31 100644 (file)
--- a/src/pybind/mgr/cephadm/services/iscsi.py
+++ b/src/pybind/mgr/cephadm/services/iscsi.py
@@ -1,10 +1,12 @@
+import errno
  import json
  import logging
  from typing import List, cast
  
+from mgr_module import HandleCommandResult
  from ceph.deployment.service_spec import IscsiServiceSpec
  
-from orchestrator import DaemonDescription
+from orchestrator import DaemonDescription, DaemonDescriptionStatus
  from .cephadmservice import CephadmDaemonDeploySpec, CephService
  from .. import utils
  
@@ -118,3 +120,21 @@ class IscsiService(CephService):
              get_cmd='dashboard iscsi-gateway-list',
              get_set_cmd_dicts=get_set_cmd_dicts
          )
+
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 iscsi, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 nfs daemon. make sure none are down
+        warn_message = (
+            'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one')
+        iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE)
+        for i in iscsi_daemons:
+            if i.status != DaemonDescriptionStatus.running:
+                return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
+        warn_message = f'It is presumed safe to stop {names}'
+        return HandleCommandResult(0, warn_message, '')
diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py

index b82c6e91277186f7ff324e8076735887e3a6c56c..617d18798f426cd239c946bb68d14ac672be6e93 100644 (file)
--- a/src/pybind/mgr/cephadm/services/nfs.py
+++ b/src/pybind/mgr/cephadm/services/nfs.py
@@ -1,6 +1,9 @@
+import errno
  import logging
  from typing import Dict, Tuple, Any, List, cast
  
+from mgr_module import HandleCommandResult
+
  from ceph.deployment.service_spec import NFSServiceSpec
  import rados
  
@@ -152,3 +155,18 @@ class NFSService(CephService):
      def post_remove(self, daemon: DaemonDescription) -> None:
          super().post_remove(daemon)
          self.remove_rgw_keyring(daemon)
+
+    def ok_to_stop(self, daemon_ids: List[str], force: bool = False) -> HandleCommandResult:
+        # if only 1 nfs, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+        # if reached here, there is > 1 nfs daemon.
+        if force:
+            return HandleCommandResult(0, warn_message, '')
+
+        # if reached here, > 1 nfs daemon and no force flag.
+        # Provide warning
+        warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. "
+        return HandleCommandResult(-errno.EBUSY, '', warn_message)
author	Daniel-Pivonka <dpivonka@redhat.com>
	Thu, 14 Jan 2021 22:18:43 +0000 (17:18 -0500)
committer	Sebastian Wagner <sebastian.wagner@suse.com>
	Tue, 9 Mar 2021 14:29:32 +0000 (15:29 +0100)
src/pybind/mgr/cephadm/inventory.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/cephadmservice.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/iscsi.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/nfs.py		patch \| blob \| history