From 1e30d6f624921ae98c27981d55e06c29d1b0fdd0 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 7 May 2020 15:10:46 -0700 Subject: [PATCH] teuthology/orchestra/connection: add retry exceptions Typical connection errors would cause a command to fail. Fixes: https://tracker.ceph.com/issues/45438 Signed-off-by: Patrick Donnelly --- teuthology/orchestra/connection.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/teuthology/orchestra/connection.py b/teuthology/orchestra/connection.py index 20a3b8fb24..0e5ce88b1c 100644 --- a/teuthology/orchestra/connection.py +++ b/teuthology/orchestra/connection.py @@ -4,13 +4,23 @@ Connection utilities import base64 import paramiko import os +import socket import logging +from paramiko import AuthenticationException +from paramiko.ssh_exception import NoValidConnectionsError + from teuthology.config import config from teuthology.contextutil import safe_while log = logging.getLogger(__name__) +RECONNECT_EXCEPTIONS = ( + socket.error, + AuthenticationException, + NoValidConnectionsError, +) + def split_user(user_at_host): """ @@ -107,8 +117,14 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60, try: ssh.connect(**connect_args) break - except paramiko.AuthenticationException: - log.exception( - "Error connecting to {host}".format(host=host)) + except RECONNECT_EXCEPTIONS as e: + log.debug("Error connecting to {host}: {e}".format(host=host,e=e)) + except Exception as e: + # gevent.__hub_primitives returns a generic Exception, *sigh* + if "timed out" in str(e): + log.debug("Error connecting to {host}: {e}".format(host=host,e=e)) + else: + raise + ssh.get_transport().set_keepalive(keep_alive) return ssh -- 2.39.5