]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: add keep-alive requests to ssh connections 45632/head
authorAdam King <adking@redhat.com>
Fri, 25 Mar 2022 03:21:47 +0000 (23:21 -0400)
committerAdam King <adking@redhat.com>
Fri, 25 Mar 2022 03:21:47 +0000 (23:21 -0400)
Fixes: https://tracker.ceph.com/issues/51733
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py

index d8801dba8438c2b09490364bff37ab66138aaa1f..09514db422f296cd4c66f59ffa650a36a88685f6 100644 (file)
@@ -687,10 +687,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             ssh_options += ['-i', tkey.name]
 
         self._temp_files = temp_files
-        if ssh_options:
-            self._ssh_options = ' '.join(ssh_options)  # type: Optional[str]
-        else:
-            self._ssh_options = None
+        ssh_options += ['-o', 'ServerAliveInterval=7', '-o', 'ServerAliveCountMax=3']
+        self._ssh_options = ' '.join(ssh_options)  # type: Optional[str]
 
         if self.mode == 'root':
             self.ssh_user = self.get_store('ssh_user', default='root')
index 939b50bd30b9abdff01e46296b0bd0611937efe3..9112fdbd97940753a71d142737e068673d41a2b1 100644 (file)
@@ -1265,7 +1265,15 @@ class CephadmServe:
                 if stdin:
                     self.log.debug('stdin: %s' % stdin)
 
-                python = connr.choose_python()
+                try:
+                    # if host has gone offline this is likely where we'll fail first
+                    python = connr.choose_python()
+                except RuntimeError as e:
+                    self.mgr.offline_hosts.add(host)
+                    self.mgr._reset_con(host)
+                    if error_ok:
+                        return [], [str(e)], 1
+                    raise
                 if not python:
                     raise RuntimeError(
                         'unable to find python on %s (tried %s in %s)' % (