From: Lucian Petrut Date: Tue, 17 Nov 2020 13:12:59 +0000 (+0000) Subject: msg: use timeout when initiating connection X-Git-Tag: v16.1.0~514^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F38161%2Fhead;p=ceph.git msg: use timeout when initiating connection There's a timer that enforces a connection timeout but it isn't scoped properly. It's started only after the ``connect`` request completes. Because of that, connections can hang in STATE_CONNECTING_RE state indefinitely if the "connect" operation doesn't complete. This change will start the timer when entering the STATE_CONNECTING state. Signed-off-by: Lucian Petrut --- diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc index 8b2a4a2b7b3a..5769c580e074 100644 --- a/src/msg/async/AsyncConnection.cc +++ b/src/msg/async/AsyncConnection.cc @@ -384,8 +384,10 @@ void AsyncConnection::process() { // clear timer (if any) since we are connecting/re-connecting if (last_tick_id) { center->delete_time_event(last_tick_id); - last_tick_id = 0; } + last_connect_started = ceph::coarse_mono_clock::now(); + last_tick_id = center->create_time_event( + connect_timeout_us, tick_handler); if (cs) { center->delete_file_event(cs.fd(), EVENT_READABLE | EVENT_WRITABLE); @@ -432,11 +434,6 @@ void AsyncConnection::process() { ldout(async_msgr->cct, 10) << __func__ << " connect successfully, ready to send banner" << dendl; state = STATE_CONNECTION_ESTABLISHED; - ceph_assert(last_tick_id == 0); - // exclude TCP nonblock connect time - last_connect_started = ceph::coarse_mono_clock::now(); - last_tick_id = center->create_time_event( - connect_timeout_us, tick_handler); break; }