]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async: close STATE_WAIT connection in short period 9996/head
authorHaomai Wang <haomai@xsky.com>
Sun, 19 Jun 2016 15:42:36 +0000 (23:42 +0800)
committerLoic Dachary <ldachary@redhat.com>
Wed, 29 Jun 2016 08:19:45 +0000 (10:19 +0200)
1. in practice, STATE_WAIT connection caused by racing connect should be
resolved in milliseconds level. we don't need to keep this connection
forever.
2. it will avoid unexpected osd peering hang because of outside network
problem.

Fixes: http://tracker.ceph.com/issues/16378
Signed-off-by: Haomai Wang <haomai@xsky.com>
(cherry picked from commit 4de5407ac96686748497253e4daf51177f809a95)

src/msg/async/AsyncConnection.cc

index ed7d0cf7686d3a349d64b198fe4b57b66afd618a..9aec6f4e475f5a77ce80ca7b285d293f1c052c57 100644 (file)
@@ -978,8 +978,8 @@ void AsyncConnection::process()
 
       case STATE_WAIT:
         {
-          ldout(async_msgr->cct, 20) << __func__ << " enter wait state" << dendl;
-          break;
+          ldout(async_msgr->cct, 1) << __func__ << " enter wait state, failing" << dendl;
+          goto fail;
         }
 
       default:
@@ -2200,7 +2200,8 @@ void AsyncConnection::fault()
   }
 
   write_lock.Unlock();
-  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY)) {
+  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY) &&
+      state != STATE_WAIT) { // STATE_WAIT is coming from STATE_CONNECTING_*
     // policy maybe empty when state is in accept
     if (policy.server) {
       ldout(async_msgr->cct, 0) << __func__ << " server, going to standby" << dendl;
@@ -2211,8 +2212,11 @@ void AsyncConnection::fault()
       state = STATE_CONNECTING;
     }
     backoff = utime_t();
+    center->dispatch_event_external(read_handler);
   } else {
-    if (backoff == utime_t()) {
+    if (state == STATE_WAIT) {
+      backoff.set_from_double(async_msgr->cct->_conf->ms_max_backoff);
+    } else if (backoff == utime_t()) {
       backoff.set_from_double(async_msgr->cct->_conf->ms_initial_backoff);
     } else {
       backoff += backoff;
@@ -2222,11 +2226,10 @@ void AsyncConnection::fault()
 
     state = STATE_CONNECTING;
     ldout(async_msgr->cct, 10) << __func__ << " waiting " << backoff << dendl;
+    // woke up again;
+    register_time_events.insert(center->create_time_event(
+            backoff.to_nsec()/1000, wakeup_handler));
   }
-
-  // woke up again;
-  register_time_events.insert(center->create_time_event(
-          backoff.to_nsec()/1000, wakeup_handler));
 }
 
 void AsyncConnection::was_session_reset()