]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async/Protocol*: send keep alive if existing wins
authorxie xingguo <xie.xingguo@zte.com.cn>
Wed, 27 Feb 2019 07:34:46 +0000 (15:34 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Fri, 1 Mar 2019 01:31:35 +0000 (09:31 +0800)
This is a follow-up fix of https://github.com/ceph/ceph/pull/25754.

It turns out the existing connection would win the connection race
and then be stuck in the __replacing__ stage forever without being
aware of that the underlying Pipe is actually broken.

Fix by forcing existing sending keepalive periodically __too__.

Fixes: http://tracker.ceph.com/issues/38493
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index dac1bbd5840126cf9c5ed2bdff989cb5365d6db5..80d2b8e26b73f5d33d58f12888c8a13181a3663f 100644 (file)
@@ -1998,6 +1998,8 @@ CtPtr ProtocolV1::handle_connect_message_2() {
                     << " existing_state="
                     << connection->get_state_name(existing->state) << dendl;
       reply.global_seq = exproto->peer_global_seq;
+      // make sure we notice if existing connection is no longer functioning
+      existing->send_keepalive();
       existing->lock.unlock();
       return send_connect_message_reply(CEPH_MSGR_TAG_RETRY_GLOBAL, reply,
                                         authorizer_reply);
index cf700b601d7015c8f9f4c2615056d869ea428fa3..ebf6dcea4074af42b742d52453dd05b876526c11 100644 (file)
@@ -2464,6 +2464,8 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload)
     ldout(cct, 1) << __func__
                   << " existing racing replace happened while replacing."
                   << " existing=" << existing << dendl;
+    // make sure we notice if existing connection is no longer functioning
+    existing->send_keepalive();
     auto retry = RetryGlobalFrame::Encode(session_stream_handlers,
                                           exproto->peer_global_seq);
     return WRITE(retry, "session retry", read_frame);
@@ -2571,6 +2573,8 @@ CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) {
     ldout(cct, 1) << __func__
                   << " existing racing replace happened while replacing."
                   << " existing=" << existing << dendl;
+    // make sure we notice if existing connection is no longer functioning
+    existing->send_keepalive();
     auto wait = WaitFrame::Encode(session_stream_handlers);
     return WRITE(wait, "wait", read_frame);
   }