From: Haomai Wang Date: Sun, 19 Jun 2016 15:42:36 +0000 (+0800) Subject: msg/async: close STATE_WAIT connection in short period X-Git-Tag: v10.2.3~100^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=edd3f799fb1e5d70244412c5a1be17f74397aae2;p=ceph.git msg/async: close STATE_WAIT connection in short period 1. in practice, STATE_WAIT connection caused by racing connect should be resolved in milliseconds level. we don't need to keep this connection forever. 2. it will avoid unexpected osd peering hang because of outside network problem. Fixes: http://tracker.ceph.com/issues/16378 Signed-off-by: Haomai Wang (cherry picked from commit 4de5407ac96686748497253e4daf51177f809a95) --- diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc index ed7d0cf7686d..9aec6f4e475f 100644 --- a/src/msg/async/AsyncConnection.cc +++ b/src/msg/async/AsyncConnection.cc @@ -978,8 +978,8 @@ void AsyncConnection::process() case STATE_WAIT: { - ldout(async_msgr->cct, 20) << __func__ << " enter wait state" << dendl; - break; + ldout(async_msgr->cct, 1) << __func__ << " enter wait state, failing" << dendl; + goto fail; } default: @@ -2200,7 +2200,8 @@ void AsyncConnection::fault() } write_lock.Unlock(); - if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY)) { + if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY) && + state != STATE_WAIT) { // STATE_WAIT is coming from STATE_CONNECTING_* // policy maybe empty when state is in accept if (policy.server) { ldout(async_msgr->cct, 0) << __func__ << " server, going to standby" << dendl; @@ -2211,8 +2212,11 @@ void AsyncConnection::fault() state = STATE_CONNECTING; } backoff = utime_t(); + center->dispatch_event_external(read_handler); } else { - if (backoff == utime_t()) { + if (state == STATE_WAIT) { + backoff.set_from_double(async_msgr->cct->_conf->ms_max_backoff); + } else if (backoff == utime_t()) { backoff.set_from_double(async_msgr->cct->_conf->ms_initial_backoff); } else { backoff += backoff; @@ -2222,11 +2226,10 @@ void AsyncConnection::fault() state = STATE_CONNECTING; ldout(async_msgr->cct, 10) << __func__ << " waiting " << backoff << dendl; + // woke up again; + register_time_events.insert(center->create_time_event( + backoff.to_nsec()/1000, wakeup_handler)); } - - // woke up again; - register_time_events.insert(center->create_time_event( - backoff.to_nsec()/1000, wakeup_handler)); } void AsyncConnection::was_session_reset()