]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async: close STATE_WAIT connection in short period
authorHaomai Wang <haomai@xsky.com>
Sun, 19 Jun 2016 15:42:36 +0000 (23:42 +0800)
committerHaomai Wang <haomai@xsky.com>
Sun, 19 Jun 2016 18:18:05 +0000 (02:18 +0800)
1. in practice, STATE_WAIT connection caused by racing connect should be
resolved in milliseconds level. we don't need to keep this connection
forever.
2. it will avoid unexpected osd peering hang because of outside network
problem.

Fixes: http://tracker.ceph.com/issues/16378
Signed-off-by: Haomai Wang <haomai@xsky.com>
src/msg/async/AsyncConnection.cc

index 229d2e06bb9e30a48adf83933b8a78a9f6d22916..71ea16848885c391758f61eb0aba7ec50974bb77 100644 (file)
@@ -932,8 +932,8 @@ void AsyncConnection::process()
 
       case STATE_WAIT:
         {
-          ldout(async_msgr->cct, 20) << __func__ << " enter wait state" << dendl;
-          break;
+          ldout(async_msgr->cct, 1) << __func__ << " enter wait state, failing" << dendl;
+          goto fail;
         }
 
       default:
@@ -2132,7 +2132,8 @@ void AsyncConnection::fault()
   }
 
   write_lock.Unlock();
-  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY)) {
+  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY) &&
+      state != STATE_WAIT) { // STATE_WAIT is coming from STATE_CONNECTING_*
     // policy maybe empty when state is in accept
     if (policy.server) {
       ldout(async_msgr->cct, 0) << __func__ << " server, going to standby" << dendl;
@@ -2143,8 +2144,11 @@ void AsyncConnection::fault()
       state = STATE_CONNECTING;
     }
     backoff = utime_t();
+    center->dispatch_event_external(read_handler);
   } else {
-    if (backoff == utime_t()) {
+    if (state == STATE_WAIT) {
+      backoff.set_from_double(async_msgr->cct->_conf->ms_max_backoff);
+    } else if (backoff == utime_t()) {
       backoff.set_from_double(async_msgr->cct->_conf->ms_initial_backoff);
     } else {
       backoff += backoff;
@@ -2154,11 +2158,10 @@ void AsyncConnection::fault()
 
     state = STATE_CONNECTING;
     ldout(async_msgr->cct, 10) << __func__ << " waiting " << backoff << dendl;
+    // woke up again;
+    register_time_events.insert(center->create_time_event(
+            backoff.to_nsec()/1000, wakeup_handler));
   }
-
-  // woke up again;
-  register_time_events.insert(center->create_time_event(
-          backoff.to_nsec()/1000, wakeup_handler));
 }
 
 void AsyncConnection::was_session_reset()