From 5a6910bfbab3defb5b5b665c004cc761d422f5c2 Mon Sep 17 00:00:00 2001 From: Ricardo Dias Date: Wed, 20 Feb 2019 11:22:03 +0000 Subject: [PATCH] msg/async: msgr2: resolve reconnect races Signed-off-by: Ricardo Dias --- src/msg/async/ProtocolV2.cc | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index 8e5cad257cecf..c523e3635aeaa 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -2782,7 +2782,7 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) { return WRITE(retry.get_buffer(), "session retry", read_frame); } - if (exproto->connect_seq >= reconnect.connect_seq()) { + if (exproto->connect_seq > reconnect.connect_seq()) { ldout(cct, 5) << __func__ << " stale connect_seq scs=" << exproto->connect_seq << " ccs=" << reconnect.connect_seq() @@ -2791,6 +2791,28 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) { return WRITE(retry.get_buffer(), "session retry", read_frame); } + if (exproto->connect_seq == reconnect.connect_seq()) { + // reconnect race: both peers are sending reconnect messages + if (existing->peer_addrs->msgr2_addr() > + messenger->get_myaddrs().msgr2_addr() && + !existing->policy.server) { + // the existing connection wins + ldout(cct, 1) + << __func__ + << " reconnect race detected, this connection loses to existing=" + << existing << dendl; + + WaitFrame wait; + return WRITE(wait.get_buffer(), "wait", read_frame); + } else { + // this connection wins + ldout(cct, 1) << __func__ + << " reconnect race detected, replacing existing=" + << existing << " socket by this connection's socket" + << dendl; + } + } + ldout(cct, 1) << __func__ << " reconnect to existing=" << existing << dendl; reconnecting = true; -- 2.39.5