mgr/cephadm: Make daemon actions asynchronous

author Sebastian Wagner <sebastian.wagner@suse.com>

Fri, 21 Aug 2020 14:25:31 +0000 (16:25 +0200)

committer Sebastian Wagner <sebastian.wagner@suse.com>

Fri, 28 Aug 2020 07:54:41 +0000 (09:54 +0200)
author Sebastian Wagner <sebastian.wagner@suse.com>
Fri, 21 Aug 2020 14:25:31 +0000 (16:25 +0200)
committer Sebastian Wagner <sebastian.wagner@suse.com>
Fri, 28 Aug 2020 07:54:41 +0000 (09:54 +0200)
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index ef2d7997cafa06778a17c8e7625649d8955fed5b..0a6ef8d0cb481bb75ad93eee783c10ebbe099f43 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -1619,23 +1619,12 @@ To check that the host is reachable:
              daemon_type=daemon_type,
          )
  
-        if image is not None:
-            if action != 'redeploy':
-                raise OrchestratorError(
-                    f'Cannot execute {action} with new image. `action` needs to be `redeploy`')
-            if daemon_type not in CEPH_TYPES:
-                raise OrchestratorError(
-                    f'Cannot redeploy {daemon_type}.{daemon_id} with a new image: Supported '
-                    f'types are: {", ".join(CEPH_TYPES)}')
-
-            self.check_mon_command({
-                'prefix': 'config set',
-                'name': 'container_image',
-                'value': image,
-                'who': utils.name_to_config_section(daemon_type + '.' + daemon_id),
-            })
+        self._daemon_action_set_image(action, image, daemon_type, daemon_id)
  
          if action == 'redeploy':
+            if self.daemon_is_self(daemon_type, daemon_id):
+                self.mgr_service.fail_over()
+                return # unreachable.
              # stop, recreate the container+unit, then restart
              return self._create_daemon(daemon_spec)
          elif action == 'reconfig':
@@ -1659,13 +1648,50 @@ To check that the host is reachable:
          self.events.for_daemon(name, 'INFO', msg)
          return msg
  
+    def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str):
+        if image is not None:
+            if action != 'redeploy':
+                raise OrchestratorError(
+                    f'Cannot execute {action} with new image. `action` needs to be `redeploy`')
+            if daemon_type not in CEPH_TYPES:
+                raise OrchestratorError(
+                    f'Cannot redeploy {daemon_type}.{daemon_id} with a new image: Supported '
+                    f'types are: {", ".join(CEPH_TYPES)}')
+
+            self.check_mon_command({
+                'prefix': 'config set',
+                'name': 'container_image',
+                'value': image,
+                'who': utils.name_to_config_section(daemon_type + '.' + daemon_id),
+            })
+
      @trivial_completion
      def daemon_action(self, action: str, daemon_name: str, image: Optional[str]=None) -> str:
          d = self.cache.get_daemon(daemon_name)
  
-        self.log.info(f'{action} daemon {daemon_name}')
-        return self._daemon_action(d.daemon_type, d.daemon_id,
-                                 d.hostname, action, image=image)
+        if action == 'redeploy' and self.daemon_is_self(d.daemon_type, d.daemon_id) \
+                and not self.mgr_service.mgr_map_has_standby():
+            raise OrchestratorError(
+                f'Unable to schedule redeploy for {daemon_name}: No standby MGRs')
+
+        self._daemon_action_set_image(action, image, d.daemon_type, d.daemon_id)
+
+        self.log.info(f'Schedule {action} daemon {daemon_name}')
+        return self._schedule_daemon_action(daemon_name, action)
+
+    def daemon_is_self(self, daemon_type: str, daemon_id: str) -> bool:
+        return daemon_type == 'mgr' and daemon_id == self.get_mgr_id()
+
+    def _schedule_daemon_action(self, daemon_name: str, action: str):
+        dd = self.cache.get_daemon(daemon_name)
+        if action == 'redeploy' and self.daemon_is_self(dd.daemon_type, dd.daemon_id) \
+                and not self.mgr_service.mgr_map_has_standby():
+            raise OrchestratorError(
+                f'Unable to schedule redeploy for {daemon_name}: No standby MGRs')
+        self.cache.schedule_daemon_action(dd.hostname, dd.name(), action)
+        msg = "Scheduled to {} {} on host '{}'".format(action, daemon_name, dd.hostname)
+        self._kick_serve_loop()
+        return msg
  
      @trivial_completion
      def remove_daemons(self, names):
@@ -2154,30 +2180,34 @@ To check that the host is reachable:
                  dd.hostname, dd.name())
              if last_deps is None:
                  last_deps = []
-            reconfig = False
+            action = self.cache.get_scheduled_daemon_action(dd.hostname, dd.name())
              if not last_config:
                  self.log.info('Reconfiguring %s (unknown last config time)...'% (
                      dd.name()))
-                reconfig = True
+                action = 'reconfig'
              elif last_deps != deps:
                  self.log.debug('%s deps %s -> %s' % (dd.name(), last_deps,
                                                       deps))
                  self.log.info('Reconfiguring %s (dependencies changed)...' % (
                      dd.name()))
-                reconfig = True
+                action = 'reconfig'
              elif self.last_monmap and \
                      self.last_monmap > last_config and \
                 dd.daemon_type in CEPH_TYPES:
                  self.log.info('Reconfiguring %s (monmap changed)...' % dd.name())
-                reconfig = True
-            if reconfig:
+                action = 'reconfig'
+            if action:
+                if self.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \
+                        and action == 'reconfig':
+                    action = 'redeploy'
                  try:
-                    self._create_daemon(
-                        CephadmDaemonSpec(
-                            host=dd.hostname,
-                            daemon_id=dd.daemon_id,
-                            daemon_type=dd.daemon_type),
-                        reconfig=True)
+                    self._daemon_action(
+                        daemon_type=dd.daemon_type,
+                        daemon_id=dd.daemon_id,
+                        host=dd.hostname,
+                        action=action
+                    )
+                    self.cache.rm_scheduled_daemon_action(dd.hostname, dd.name())
                  except OrchestratorError as e:
                      self.events.from_orch_error(e)
                      if dd.daemon_type in daemons_post:
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py

index 304e4a10a00d2246e2ba263f2fb987129df26499..52b08a21afaeb009741de0346eccc934ba688072 100644 (file)
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -355,17 +355,14 @@ class MgrService(CephadmService):
          return self.mgr._create_daemon(daemon_spec)
  
      def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
-        active_mgr_str = self.mgr.get('mgr_map')['active_name']
          for daemon in daemon_descrs:
-            if daemon.daemon_id == active_mgr_str:
+            if self.mgr.daemon_is_self(daemon.daemon_type, daemon.daemon_id):
                  return daemon
          # if no active mgr found, return empty Daemon Desc
          return DaemonDescription()
  
      def fail_over(self):
-        mgr_map = self.mgr.get('mgr_map')
-        num = len(mgr_map.get('standbys'))
-        if not num:
+        if not self.mgr_map_has_standby():
              raise OrchestratorError('Need standby mgr daemon', event_kind_subject=(
                  'daemon', 'mgr' + self.mgr.get_mgr_id()))
  
@@ -379,6 +376,15 @@ class MgrService(CephadmService):
              'who': self.mgr.get_mgr_id(),
          })
  
+    def mgr_map_has_standby(self) -> bool:
+        """
+        This is a bit safer than asking our inventory. If the mgr joined the mgr map,
+        we know it joined the cluster
+        """
+        mgr_map = self.mgr.get('mgr_map')
+        num = len(mgr_map.get('standbys'))
+        return bool(num)
+
  
  class MdsService(CephadmService):
      TYPE = 'mds'
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py

index fcff289609bd8ede4983cac14180f2f4b2204436..5d53a5f13c4e15eb7a2fcaf7cc722f2cf1d26c7b 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -184,11 +184,13 @@ class TestCephadm(object):
              with with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), CephadmOrchestrator.add_rgw, 'test') as daemon_id:
  
                  c = cephadm_module.daemon_action('redeploy', 'rgw.' + daemon_id)
-                assert wait(cephadm_module, c) == f"Deployed rgw.{daemon_id} on host 'test'"
+                assert wait(cephadm_module,
+                            c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'"
  
                  for what in ('start', 'stop', 'restart'):
                      c = cephadm_module.daemon_action(what, 'rgw.' + daemon_id)
-                    assert wait(cephadm_module, c) == what + f" rgw.{daemon_id} from host 'test'"
+                    assert wait(cephadm_module,
+                                c) == F"Scheduled to {what} rgw.{daemon_id} on host 'test'"
  
                  # Make sure, _check_daemons does a redeploy due to monmap change:
                  cephadm_module._store['_ceph_get/mon_map'] = {
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py

index 89cc39038d809f48fcd0d3506831a99bb7d496b8..eb1929bc96110ee019df79419eaf558c9b7d392f 100644 (file)
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -261,8 +261,7 @@ class CephadmUpgrade:
                      daemon_type, d.daemon_id,
                      d.container_image_name, d.container_image_id, d.version))
  
-                if daemon_type == 'mgr' and \
-                   d.daemon_id == self.mgr.get_mgr_id():
+                if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id):
                      logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
                                  self.mgr.get_mgr_id())
                      need_upgrade_self = True
author	Sebastian Wagner <sebastian.wagner@suse.com>
	Fri, 21 Aug 2020 14:25:31 +0000 (16:25 +0200)
committer	Sebastian Wagner <sebastian.wagner@suse.com>
	Fri, 28 Aug 2020 07:54:41 +0000 (09:54 +0200)
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/cephadmservice.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_cephadm.py		patch \| blob \| history
src/pybind/mgr/cephadm/upgrade.py		patch \| blob \| history