From: Ricardo Dias Date: Wed, 20 Feb 2019 11:22:03 +0000 (+0000) Subject: msg/async: msgr2: resolve reconnect races X-Git-Tag: v14.1.0~14^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5a6910bfbab3defb5b5b665c004cc761d422f5c2;p=ceph.git msg/async: msgr2: resolve reconnect races Signed-off-by: Ricardo Dias --- diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index 8e5cad257cec..c523e3635aea 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -2782,7 +2782,7 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) { return WRITE(retry.get_buffer(), "session retry", read_frame); } - if (exproto->connect_seq >= reconnect.connect_seq()) { + if (exproto->connect_seq > reconnect.connect_seq()) { ldout(cct, 5) << __func__ << " stale connect_seq scs=" << exproto->connect_seq << " ccs=" << reconnect.connect_seq() @@ -2791,6 +2791,28 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) { return WRITE(retry.get_buffer(), "session retry", read_frame); } + if (exproto->connect_seq == reconnect.connect_seq()) { + // reconnect race: both peers are sending reconnect messages + if (existing->peer_addrs->msgr2_addr() > + messenger->get_myaddrs().msgr2_addr() && + !existing->policy.server) { + // the existing connection wins + ldout(cct, 1) + << __func__ + << " reconnect race detected, this connection loses to existing=" + << existing << dendl; + + WaitFrame wait; + return WRITE(wait.get_buffer(), "wait", read_frame); + } else { + // this connection wins + ldout(cct, 1) << __func__ + << " reconnect race detected, replacing existing=" + << existing << " socket by this connection's socket" + << dendl; + } + } + ldout(cct, 1) << __func__ << " reconnect to existing=" << existing << dendl; reconnecting = true;