From: Joshua Blanch Date: Fri, 1 Nov 2024 21:19:42 +0000 (+0000) Subject: mgr/cephadm: Retry command when SSH connection closes X-Git-Tag: v20.0.0~333^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f83c88077bf17431b288d2a09d28218e506f72b4;p=ceph.git mgr/cephadm: Retry command when SSH connection closes Handle scenarios where the SSH connection is closed or broken. Ensure commands like exiting maintenance attempt to reconnect rather than throwing an error to the user. Fixes: https://tracker.ceph.com/issues/67905 Signed-off-by: Joshua Blanch --- diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 4a7959ae0450..5fc26b10fa86 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1696,7 +1696,12 @@ class CephadmServe: self.log.debug('stdin: %s' % stdin) cmd = ssh.RemoteCommand(WHICH, ['python3']) - python = await self.mgr.ssh._check_execute_command(host, cmd, addr=addr) + try: + # when connection was broken/closed, retrying resets the connection + python = await self.mgr.ssh._check_execute_command(host, cmd, addr=addr) + except ssh.HostConnectionError: + python = await self.mgr.ssh._check_execute_command(host, cmd, addr=addr) + # N.B. because the python3 executable is based on the results of the # which command we can not know it ahead of time and must be converted # into a RemoteExecutable.