From: Adam King Date: Fri, 25 Mar 2022 03:21:47 +0000 (-0400) Subject: mgr/cephadm: add keep-alive requests to ssh connections X-Git-Tag: v16.2.8~28^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=03d94383136d9042a41adcf721c7831fcca33d0f;p=ceph.git mgr/cephadm: add keep-alive requests to ssh connections Fixes: https://tracker.ceph.com/issues/51733 Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index d8801dba8438..09514db422f2 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -687,10 +687,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, ssh_options += ['-i', tkey.name] self._temp_files = temp_files - if ssh_options: - self._ssh_options = ' '.join(ssh_options) # type: Optional[str] - else: - self._ssh_options = None + ssh_options += ['-o', 'ServerAliveInterval=7', '-o', 'ServerAliveCountMax=3'] + self._ssh_options = ' '.join(ssh_options) # type: Optional[str] if self.mode == 'root': self.ssh_user = self.get_store('ssh_user', default='root') diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 939b50bd30b9..9112fdbd9794 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1265,7 +1265,15 @@ class CephadmServe: if stdin: self.log.debug('stdin: %s' % stdin) - python = connr.choose_python() + try: + # if host has gone offline this is likely where we'll fail first + python = connr.choose_python() + except RuntimeError as e: + self.mgr.offline_hosts.add(host) + self.mgr._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise if not python: raise RuntimeError( 'unable to find python on %s (tried %s in %s)' % (