]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async/Protocol: send keepalive on connection race winner 25754/head
authorSage Weil <sage@redhat.com>
Wed, 2 Jan 2019 21:59:56 +0000 (15:59 -0600)
committerSage Weil <sage@redhat.com>
Fri, 4 Jan 2019 19:46:47 +0000 (13:46 -0600)
If we win a connection race, we could still hit a fault before we finish
connecting, and then go into standby.  Ensure that we fully connect so that
we can send our keepalive and the other end is able to send their whatever
to us.

Fixes: http://tracker.ceph.com/issues/37779
Signed-off-by: Sage Weil <sage@redhat.com>
src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index ab4b526105f1ca5d82a285adc34e2e28e747d8c4..4b97158f1831f01cb3889a733ed6dfdfa81f7425 100644 (file)
@@ -176,7 +176,8 @@ void ProtocolV1::fault() {
 
   reset_recv_state();
 
-  if (connection->policy.standby && out_q.empty() && state != WAIT) {
+  if (connection->policy.standby && out_q.empty() && !keepalive &&
+      state != WAIT) {
     ldout(cct, 10) << __func__ << " with nothing to send, going to standby"
                    << dendl;
     state = STANDBY;
@@ -2099,6 +2100,10 @@ CtPtr ProtocolV1::handle_connect_message_2() {
         ceph_assert(connection->peer_addrs.legacy_addr() >
                     messenger->get_myaddr());
         existing->lock.unlock();
+       // make sure we follow through with opening the existing
+       // connection (if it isn't yet open) since we know the peer
+       // has something to send to us.
+       existing->send_keepalive();
         return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply,
                                           authorizer_reply);
       }
index 99b424e6dbc3c1aea9a0d4704b64d8aa695966e3..b884203421da1dc52b66625efb5b1ecd1d23b635 100644 (file)
@@ -173,7 +173,8 @@ void ProtocolV2::fault() {
 
   reset_recv_state();
 
-  if (connection->policy.standby && out_q.empty() && state != WAIT) {
+  if (connection->policy.standby && out_q.empty() && !keepalive &&
+      state != WAIT) {
     ldout(cct, 10) << __func__ << " with nothing to send, going to standby"
                    << dendl;
     state = STANDBY;
@@ -2147,7 +2148,11 @@ CtPtr ProtocolV2::handle_connect_message_2() {
         ceph_assert(connection->peer_addrs.legacy_addr() >
                     messenger->get_myaddr());
         existing->lock.unlock();
-        return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply,
+       // make sure we follow through with opening the existing
+       // connection (if it isn't yet open) since we know the peer
+       // has something to send to us.
+       existing->send_keepalive();
+       return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply,
                                           authorizer_reply);
       }
     }