From: xie xingguo Date: Wed, 27 Feb 2019 07:34:46 +0000 (+0800) Subject: msg/async/Protocol*: send keep alive if existing wins X-Git-Tag: v14.1.1~76^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=61b9432ef9a3847eceb96f8d5a854567c49bbf61;p=ceph.git msg/async/Protocol*: send keep alive if existing wins This is a follow-up fix of https://github.com/ceph/ceph/pull/25754. It turns out the existing connection would win the connection race and then be stuck in the __replacing__ stage forever without being aware of that the underlying Pipe is actually broken. Fix by forcing existing sending keepalive periodically __too__. Fixes: http://tracker.ceph.com/issues/38493 Signed-off-by: xie xingguo --- diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc index dac1bbd584012..80d2b8e26b73f 100644 --- a/src/msg/async/ProtocolV1.cc +++ b/src/msg/async/ProtocolV1.cc @@ -1998,6 +1998,8 @@ CtPtr ProtocolV1::handle_connect_message_2() { << " existing_state=" << connection->get_state_name(existing->state) << dendl; reply.global_seq = exproto->peer_global_seq; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); existing->lock.unlock(); return send_connect_message_reply(CEPH_MSGR_TAG_RETRY_GLOBAL, reply, authorizer_reply); diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index cf700b601d701..ebf6dcea4074a 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -2464,6 +2464,8 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload) ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto retry = RetryGlobalFrame::Encode(session_stream_handlers, exproto->peer_global_seq); return WRITE(retry, "session retry", read_frame); @@ -2571,6 +2573,8 @@ CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) { ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto wait = WaitFrame::Encode(session_stream_handlers); return WRITE(wait, "wait", read_frame); }