From 61b9432ef9a3847eceb96f8d5a854567c49bbf61 Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Wed, 27 Feb 2019 15:34:46 +0800 Subject: [PATCH] msg/async/Protocol*: send keep alive if existing wins This is a follow-up fix of https://github.com/ceph/ceph/pull/25754. It turns out the existing connection would win the connection race and then be stuck in the __replacing__ stage forever without being aware of that the underlying Pipe is actually broken. Fix by forcing existing sending keepalive periodically __too__. Fixes: http://tracker.ceph.com/issues/38493 Signed-off-by: xie xingguo --- src/msg/async/ProtocolV1.cc | 2 ++ src/msg/async/ProtocolV2.cc | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc index dac1bbd5840..80d2b8e26b7 100644 --- a/src/msg/async/ProtocolV1.cc +++ b/src/msg/async/ProtocolV1.cc @@ -1998,6 +1998,8 @@ CtPtr ProtocolV1::handle_connect_message_2() { << " existing_state=" << connection->get_state_name(existing->state) << dendl; reply.global_seq = exproto->peer_global_seq; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); existing->lock.unlock(); return send_connect_message_reply(CEPH_MSGR_TAG_RETRY_GLOBAL, reply, authorizer_reply); diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index cf700b601d7..ebf6dcea407 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -2464,6 +2464,8 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload) ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto retry = RetryGlobalFrame::Encode(session_stream_handlers, exproto->peer_global_seq); return WRITE(retry, "session retry", read_frame); @@ -2571,6 +2573,8 @@ CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) { ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto wait = WaitFrame::Encode(session_stream_handlers); return WRITE(wait, "wait", read_frame); } -- 2.39.5