From 5dfd2a512d309f7f641bcf7c43277f08cf650b01 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 1 Jul 2012 15:37:31 -0700 Subject: [PATCH] msgr: choose incoming connection if ours is STANDBY If the connect_seq matches, but our existing connection is in STANDBY, take the incoming one. Otherwise, the other end will wait indefinitely for us to connect but we won't. Alternatively, we could "win" the race and trigger a connection by sending a keepalive (or similar), but that is more work; we may as well accept the incoming connection we have now. This removes STANDBY from the acceptable WAIT case states. It also keeps responsibility squarely on the shoulders of the peer with something to deliver. Without this patch, a 3-osd vstart cluster with 'ms inject socket failures = 100' and rados bench write -b 4096 would start generating slow request warnings after a few minutes due to the osds failing to connect to each other. With the patch, I complete a 10 minute run without problems. Signed-off-by: Sage Weil --- src/msg/SimpleMessenger.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index 3514de7fcf281..06f0bc84950bf 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -755,10 +755,11 @@ int SimpleMessenger::Pipe::accept() if (connect.connect_seq == existing->connect_seq) { // connection race? if (peer_addr < msgr->my_inst.addr || - existing->policy.server) { + existing->policy.server || + existing->state == STATE_STANDBY) { // incoming wins ldout(msgr->cct,10) << "accept connection race, existing " << existing << ".cseq " << existing->connect_seq - << " == " << connect.connect_seq << ", or we are server, replacing my attempt" << dendl; + << " == " << connect.connect_seq << ", or STANDBY, or we are server, replacing my attempt" << dendl; if (!(existing->state == STATE_CONNECTING || existing->state == STATE_STANDBY || existing->state == STATE_WAIT)) @@ -782,8 +783,7 @@ int SimpleMessenger::Pipe::accept() << " == " << connect.connect_seq << dendl; assert(existing->state == STATE_CONNECTING || - existing->state == STATE_OPEN || - existing->state == STATE_STANDBY); + existing->state == STATE_OPEN); reply.tag = CEPH_MSGR_TAG_WAIT; existing->pipe_lock.Unlock(); msgr->lock.Unlock(); -- 2.39.5