From: Sage Weil Date: Wed, 2 Jan 2019 21:59:56 +0000 (-0600) Subject: msg/async/Protocol: send keepalive on connection race winner X-Git-Tag: v14.1.0~394^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4af95fc8474a5131b17880498f2f8d20b9932d45;p=ceph.git msg/async/Protocol: send keepalive on connection race winner If we win a connection race, we could still hit a fault before we finish connecting, and then go into standby. Ensure that we fully connect so that we can send our keepalive and the other end is able to send their whatever to us. Fixes: http://tracker.ceph.com/issues/37779 Signed-off-by: Sage Weil --- diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc index ab4b526105f1..4b97158f1831 100644 --- a/src/msg/async/ProtocolV1.cc +++ b/src/msg/async/ProtocolV1.cc @@ -176,7 +176,8 @@ void ProtocolV1::fault() { reset_recv_state(); - if (connection->policy.standby && out_q.empty() && state != WAIT) { + if (connection->policy.standby && out_q.empty() && !keepalive && + state != WAIT) { ldout(cct, 10) << __func__ << " with nothing to send, going to standby" << dendl; state = STANDBY; @@ -2099,6 +2100,10 @@ CtPtr ProtocolV1::handle_connect_message_2() { ceph_assert(connection->peer_addrs.legacy_addr() > messenger->get_myaddr()); existing->lock.unlock(); + // make sure we follow through with opening the existing + // connection (if it isn't yet open) since we know the peer + // has something to send to us. + existing->send_keepalive(); return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply, authorizer_reply); } diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index 99b424e6dbc3..b884203421da 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -173,7 +173,8 @@ void ProtocolV2::fault() { reset_recv_state(); - if (connection->policy.standby && out_q.empty() && state != WAIT) { + if (connection->policy.standby && out_q.empty() && !keepalive && + state != WAIT) { ldout(cct, 10) << __func__ << " with nothing to send, going to standby" << dendl; state = STANDBY; @@ -2147,7 +2148,11 @@ CtPtr ProtocolV2::handle_connect_message_2() { ceph_assert(connection->peer_addrs.legacy_addr() > messenger->get_myaddr()); existing->lock.unlock(); - return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply, + // make sure we follow through with opening the existing + // connection (if it isn't yet open) since we know the peer + // has something to send to us. + existing->send_keepalive(); + return send_connect_message_reply(CEPH_MSGR_TAG_WAIT, reply, authorizer_reply); } }