From 4be0235d740bb5e846fe015d18192bf9ff0a2037 Mon Sep 17 00:00:00 2001 From: Haomai Wang Date: Sat, 26 Aug 2017 22:09:54 -0700 Subject: [PATCH] msg/async: don't stuck into resetsession/retrysession loop if exist connection's connect_seq is zero and state is STANDBY and peer connection is new, we will reply with RETRYSESSION and make peer connection incr connect_seq. Then we receive connect.connect_seq==1, but existing->connect_seq is still zero, so RESETSESSION tag replied. RESET->RETRY->RESET loop is forever Signed-off-by: Haomai Wang --- src/msg/async/AsyncConnection.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc index e7895ddca75..6941bcc8ed4 100644 --- a/src/msg/async/AsyncConnection.cc +++ b/src/msg/async/AsyncConnection.cc @@ -1609,6 +1609,11 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis ldout(async_msgr->cct, 10) << __func__ << " accept connection race, existing " << existing << ".cseq " << existing->connect_seq << " == " << connect.connect_seq << ", OPEN|STANDBY, RETRY_SESSION" << dendl; + // if connect_seq both zero, dont stuck into dead lock. it's ok to replace + if (policy.resetcheck && existing->connect_seq == 0) { + goto replace; + } + reply.connect_seq = existing->connect_seq + 1; existing->lock.unlock(); return _reply_accept(CEPH_MSGR_TAG_RETRY_SESSION, connect, reply, authorizer_reply); -- 2.39.5