]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async: msgr2: resolve reconnect races
authorRicardo Dias <rdias@suse.com>
Wed, 20 Feb 2019 11:22:03 +0000 (11:22 +0000)
committerRicardo Dias <rdias@suse.com>
Wed, 20 Feb 2019 13:36:13 +0000 (13:36 +0000)
Signed-off-by: Ricardo Dias <rdias@suse.com>
src/msg/async/ProtocolV2.cc

index 8e5cad257cecf61456736ad6f079bd2613f21957..c523e3635aeaad8c31bef23bec925d48c8818b78 100644 (file)
@@ -2782,7 +2782,7 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) {
     return WRITE(retry.get_buffer(), "session retry", read_frame);
   }
 
-  if (exproto->connect_seq >= reconnect.connect_seq()) {
+  if (exproto->connect_seq > reconnect.connect_seq()) {
     ldout(cct, 5) << __func__
                   << " stale connect_seq scs=" << exproto->connect_seq
                   << " ccs=" << reconnect.connect_seq()
@@ -2791,6 +2791,28 @@ CtPtr ProtocolV2::handle_reconnect(char *payload, uint32_t length) {
     return WRITE(retry.get_buffer(), "session retry", read_frame);
   }
 
+  if (exproto->connect_seq == reconnect.connect_seq()) {
+    // reconnect race: both peers are sending reconnect messages
+    if (existing->peer_addrs->msgr2_addr() >
+            messenger->get_myaddrs().msgr2_addr() &&
+        !existing->policy.server) {
+      // the existing connection wins
+      ldout(cct, 1)
+          << __func__
+          << " reconnect race detected, this connection loses to existing="
+          << existing << dendl;
+
+      WaitFrame wait;
+      return WRITE(wait.get_buffer(), "wait", read_frame);
+    } else {
+      // this connection wins
+      ldout(cct, 1) << __func__
+                    << " reconnect race detected, replacing existing="
+                    << existing << " socket by this connection's socket"
+                    << dendl;
+    }
+  }
+
   ldout(cct, 1) << __func__ << " reconnect to existing=" << existing << dendl;
 
   reconnecting = true;