]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msgr: choose incoming connection if ours is STANDBY
authorSage Weil <sage@inktank.com>
Sun, 1 Jul 2012 22:37:31 +0000 (15:37 -0700)
committerSage Weil <sage@inktank.com>
Tue, 3 Jul 2012 00:54:00 +0000 (17:54 -0700)
If the connect_seq matches, but our existing connection is in STANDBY, take
the incoming one.  Otherwise, the other end will wait indefinitely for us
to connect but we won't.

Alternatively, we could "win" the race and trigger a connection by sending
a keepalive (or similar), but that is more work; we may as well accept the
incoming connection we have now.

This removes STANDBY from the acceptable WAIT case states.  It also keeps
responsibility squarely on the shoulders of the peer with something to
deliver.

Without this patch, a 3-osd vstart cluster with
'ms inject socket failures = 100' and rados bench write -b 4096 would start
generating slow request warnings after a few minutes due to the osds
failing to connect to each other.  With the patch, I complete a 10 minute
run without problems.

Signed-off-by: Sage Weil <sage@inktank.com>
src/msg/SimpleMessenger.cc

index 3514de7fcf28179d72c13263467c7ec6bf8b6d43..06f0bc84950bf3c731878d8f15654cbe7b348bae 100644 (file)
@@ -755,10 +755,11 @@ int SimpleMessenger::Pipe::accept()
       if (connect.connect_seq == existing->connect_seq) {
        // connection race?
        if (peer_addr < msgr->my_inst.addr ||
-           existing->policy.server) {
+           existing->policy.server ||
+           existing->state == STATE_STANDBY) {
          // incoming wins
          ldout(msgr->cct,10) << "accept connection race, existing " << existing << ".cseq " << existing->connect_seq
-                  << " == " << connect.connect_seq << ", or we are server, replacing my attempt" << dendl;
+                  << " == " << connect.connect_seq << ", or STANDBY, or we are server, replacing my attempt" << dendl;
          if (!(existing->state == STATE_CONNECTING ||
                existing->state == STATE_STANDBY ||
                existing->state == STATE_WAIT))
@@ -782,8 +783,7 @@ int SimpleMessenger::Pipe::accept()
                             << " == " << connect.connect_seq
                             << dendl;
          assert(existing->state == STATE_CONNECTING ||
-                existing->state == STATE_OPEN ||
-                existing->state == STATE_STANDBY);
+                existing->state == STATE_OPEN);
          reply.tag = CEPH_MSGR_TAG_WAIT;
          existing->pipe_lock.Unlock();
          msgr->lock.Unlock();