]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
teuthology/orchestra/connection: add retry exceptions 1466/head
authorPatrick Donnelly <pdonnell@redhat.com>
Thu, 7 May 2020 22:10:46 +0000 (15:10 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Tue, 12 May 2020 21:37:04 +0000 (14:37 -0700)
Typical connection errors would cause a command to fail.

Fixes: https://tracker.ceph.com/issues/45438
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
teuthology/orchestra/connection.py

index 20a3b8fb2496af84c87ad4d401babffd3eb06f94..0e5ce88b1c3d483419307b330b95e892582e94f7 100644 (file)
@@ -4,13 +4,23 @@ Connection utilities
 import base64
 import paramiko
 import os
+import socket
 import logging
 
+from paramiko import AuthenticationException
+from paramiko.ssh_exception import NoValidConnectionsError
+
 from teuthology.config import config
 from teuthology.contextutil import safe_while
 
 log = logging.getLogger(__name__)
 
+RECONNECT_EXCEPTIONS = (
+  socket.error,
+  AuthenticationException,
+  NoValidConnectionsError,
+)
+
 
 def split_user(user_at_host):
     """
@@ -107,8 +117,14 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
                 try:
                     ssh.connect(**connect_args)
                     break
-                except paramiko.AuthenticationException:
-                    log.exception(
-                        "Error connecting to {host}".format(host=host))
+                except RECONNECT_EXCEPTIONS as e:
+                    log.debug("Error connecting to {host}: {e}".format(host=host,e=e))
+                except Exception as e:
+                    # gevent.__hub_primitives returns a generic Exception, *sigh*
+                    if "timed out" in str(e):
+                        log.debug("Error connecting to {host}: {e}".format(host=host,e=e))
+                    else:
+                        raise
+
     ssh.get_transport().set_keepalive(keep_alive)
     return ssh