From 7209cc6aa3263d1dfb5cb19d57dd1f6b56aa2804 Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Sat, 2 Mar 2019 16:23:12 +0800 Subject: [PATCH] msg/async: add timeout for connections which are not yet ready There could be various corner cases that may cause an async connection stuck in the connecting stage (e.g., by manually creating some loop back connections on the switches of our test cluster, we can almost 100% reproduce http://tracker.ceph.com/issues/37499). In 61b9432ef9a3847eceb96f8d5a854567c49bbf61 I try to employ the existing keep_alive mechanism to get those stuck connections out of the trap but it does not work if the corresponding connection is not yet ready, since we always require the underlying connection to be **ready** in order to send out a keep_alive message. Fix by making a more general connecting timeout strategy. If a connecting process can not be finished within a specific interval, then we simply cut it off and retry. Fixes: http://tracker.ceph.com/issues/37499 Fixes: http://tracker.ceph.com/issues/38493 Signed-off-by: xie xingguo --- src/common/legacy_config_opts.h | 1 + src/common/options.cc | 5 ++++ src/msg/async/AsyncConnection.cc | 44 ++++++++++++++++++++++++++------ src/msg/async/AsyncConnection.h | 2 ++ src/msg/async/ProtocolV1.cc | 6 +++++ src/msg/async/ProtocolV2.cc | 6 +++++ 6 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index 7fe64976cee..52a259a23b9 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -120,6 +120,7 @@ OPTION(ms_bind_retry_count, OPT_INT) // If binding fails, how many times do we r OPTION(ms_bind_retry_delay, OPT_INT) // Delay between attempts to bind OPTION(ms_bind_before_connect, OPT_BOOL) OPTION(ms_tcp_listen_backlog, OPT_INT) +OPTION(ms_connection_ready_timeout, OPT_U64) OPTION(ms_connection_idle_timeout, OPT_U64) OPTION(ms_pq_max_tokens_per_priority, OPT_U64) OPTION(ms_pq_min_cost, OPT_U64) diff --git a/src/common/options.cc b/src/common/options.cc index 1e6aa4b5fca..51a3c49ef6d 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -955,6 +955,11 @@ std::vector