From cd56b910217f092598b121209f1a2db7f21aabcb Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Tue, 3 Mar 2020 12:27:51 -0600
Subject: [PATCH] mgr/cephadm: make _do_upgrade work synchronously

It mostly was, but it was futzing with completions when it didn't need to.

Signed-off-by: Sage Weil <sage@redhat.com>
---
 src/pybind/mgr/cephadm/module.py | 47 ++++++++++++++------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index ed9b3796831..35b7fbaacd2 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -728,10 +728,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                     ev_progress=progress)
 
     def _do_upgrade(self):
-        # type: () -> Optional[AsyncCompletion]
+        # type: () -> None
         if not self.upgrade_state:
             self.log.debug('_do_upgrade no state, exiting')
-            return None
+            return
 
         target_name = self.upgrade_state.get('target_name')
         target_id = self.upgrade_state.get('target_id', None)
@@ -747,7 +747,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                     'count': 1,
                     'detail': [str(e)],
                 })
-                return None
+                return
             self.upgrade_state['target_id'] = target_id
             self.upgrade_state['target_version'] = target_version
             self._save_upgrade_state()
@@ -809,13 +809,13 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                                 'failed to pull %s on host %s' % (target_name,
                                                                   d.hostname)],
                         })
-                        return None
+                        return
                     r = json.loads(''.join(out))
                     if r.get('image_id') != target_id:
                         self.log.info('Upgrade: image %s pull on %s got new image %s (not %s), restarting' % (target_name, d.hostname, r['image_id'], target_id))
                         self.upgrade_state['target_id'] = r['image_id']
                         self._save_upgrade_state()
-                        return None
+                        return
 
                 self._update_upgrade_progress(done / len(daemons))
 
@@ -824,7 +824,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                         self.log.debug('daemon %s is stopped but has correct image name' % (d.name()))
                         continue
                 if not self._wait_for_ok_to_stop(d):
-                    return None
+                    return
                 self.log.info('Upgrade: Redeploying %s.%s' %
                               (d.daemon_type, d.daemon_id))
                 ret, out, err = self.mon_command({
@@ -833,12 +833,13 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                     'value': target_name,
                     'who': daemon_type + '.' + d.daemon_id,
                 })
-                return self._daemon_action([(
+                self._daemon_action(
                     d.daemon_type,
                     d.daemon_id,
                     d.hostname,
                     'redeploy'
-                )])
+                )
+                return
 
             if need_upgrade_self:
                 mgr_map = self.get('mgr_map')
@@ -853,7 +854,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                             'but it needs at least one standby to proceed.',
                         ],
                     })
-                    return None
+                    return
 
                 self.log.info('Upgrade: there are %d other already-upgraded '
                               'standby mgrs, failing over' % num)
@@ -865,7 +866,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                     'prefix': 'mgr fail',
                     'who': self.get_mgr_id(),
                 })
-                return None
+                return
             elif daemon_type == 'mgr':
                 if 'UPGRADE_NO_STANDBY_MGR' in self.health_checks:
                     del self.health_checks['UPGRADE_NO_STANDBY_MGR']
@@ -929,7 +930,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                         self.upgrade_state['progress_id'])
         self.upgrade_state = None
         self._save_upgrade_state()
-        return None
+        return
 
     def _check_hosts(self):
         self.log.debug('_check_hosts')
@@ -1058,19 +1059,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             self._refresh_configs()
 
             if self.upgrade_state and not self.upgrade_state.get('paused'):
-                upgrade_completion = self._do_upgrade()
-                if upgrade_completion:
-                    while not upgrade_completion.has_result:
-                        self.process([upgrade_completion])
-                        if upgrade_completion.needs_result:
-                            time.sleep(1)
-                        else:
-                            break
-                    if upgrade_completion.exception is not None:
-                        self.log.error(str(upgrade_completion.exception))
-                self.log.debug('did _do_upgrade')
-            else:
-                self._serve_sleep()
+                self._do_upgrade()
+                continue
+
+            self._serve_sleep()
         self.log.debug("serve exit")
 
     def config_notify(self):
@@ -1744,9 +1736,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             raise orchestrator.OrchestratorError(
                 'Unable to find %s.%s.* daemon(s)' % (service_name))
         self.log.info('%s service %s' % (action, service_name))
-        return self._daemon_action(args)
+        return self._daemon_actions(args)
 
     @async_map_completion
+    def _daemon_actions(self, daemon_type, daemon_id, host, action):
+        return self._daemon_action(daemon_type, daemon_id, host, action)
+
     def _daemon_action(self, daemon_type, daemon_id, host, action):
         if action == 'redeploy':
             # stop, recreate the container+unit, then restart
@@ -1782,7 +1777,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                     daemon_type, daemon_id))
         self.log.info('%s daemons %s' % (action,
                                          ['%s.%s' % (a[0], a[1]) for a in args]))
-        return self._daemon_action(args)
+        return self._daemon_actions(args)
 
     def remove_daemons(self, names, force):
         # type: (List[str], bool) -> orchestrator.Completion
-- 
2.39.5