]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
msg/async: add timeout for connections which are not yet ready
authorxie xingguo <xie.xingguo@zte.com.cn>
Sat, 2 Mar 2019 08:23:12 +0000 (16:23 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Mon, 8 Apr 2019 01:19:59 +0000 (09:19 +0800)
There could be various corner cases that may cause an async
connection stuck in the connecting stage (e.g., by manually
creating some loop back connections on the switches of our test cluster,
we can almost 100% reproduce http://tracker.ceph.com/issues/37499).

In 61b9432ef9a3847eceb96f8d5a854567c49bbf61 I try to employ the
existing keep_alive mechanism to get those stuck connections out of the
trap but it does not work if the corresponding connection
is not yet ready, since we always require the underlying connection to be
**ready** in order to send out a keep_alive message.

Fix by making a more general connecting timeout strategy.
If a connecting process can not be finished within a specific interval,
then we simply cut it off and retry.

Fixes: http://tracker.ceph.com/issues/37499
Fixes: http://tracker.ceph.com/issues/38493
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/common/legacy_config_opts.h
src/common/options.cc
src/msg/async/AsyncConnection.cc
src/msg/async/AsyncConnection.h
src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index 7fe64976ceeac129d64367c19bedeb7162a87527..52a259a23b9fcb677890bc0de1d0501b690c2a8d 100644 (file)
@@ -120,6 +120,7 @@ OPTION(ms_bind_retry_count, OPT_INT) // If binding fails, how many times do we r
 OPTION(ms_bind_retry_delay, OPT_INT) // Delay between attempts to bind
 OPTION(ms_bind_before_connect, OPT_BOOL)
 OPTION(ms_tcp_listen_backlog, OPT_INT)
+OPTION(ms_connection_ready_timeout, OPT_U64)
 OPTION(ms_connection_idle_timeout, OPT_U64)
 OPTION(ms_pq_max_tokens_per_priority, OPT_U64)
 OPTION(ms_pq_min_cost, OPT_U64)
index 1e6aa4b5fca1b16c920cbe904b97f2f2066fcd18..51a3c49ef6d79316bd6714ea2f19d89a437fd64a 100644 (file)
@@ -955,6 +955,11 @@ std::vector<Option> get_global_options() {
     .set_default(512)
     .set_description("Size of queue of incoming connections for accept(2)"),
 
+
+    Option("ms_connection_ready_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description("Time before we declare a not yet ready connection as dead (seconds)"),
+
     Option("ms_connection_idle_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
     .set_default(900)
     .set_description("Time before an idle connection is closed (seconds)"),
index 7364473ed37eefcfcc2fa1d3bb0ca564a699a253..a364e58add6fece00923b6778b464bac7b91b846 100644 (file)
@@ -120,6 +120,7 @@ AsyncConnection::AsyncConnection(CephContext *cct, AsyncMessenger *m, DispatchQu
     recv_max_prefetch(std::max<int64_t>(msgr->cct->_conf->ms_tcp_prefetch_max_size, TCP_PREFETCH_MIN_SIZE)),
     recv_start(0), recv_end(0),
     last_active(ceph::coarse_mono_clock::now()),
+    connect_timeout_us(cct->_conf->ms_connection_ready_timeout*1000*1000),
     inactive_timeout_us(cct->_conf->ms_connection_idle_timeout*1000*1000),
     msgr2(m2), state_offset(0),
     worker(w), center(&w->center),read_buffer(nullptr)
@@ -373,6 +374,12 @@ void AsyncConnection::process() {
     case STATE_CONNECTING: {
       ceph_assert(!policy.server);
 
+      // clear timer (if any) since we are connecting/re-connecting
+      if (last_tick_id) {
+        center->delete_time_event(last_tick_id);
+        last_tick_id = 0;
+      }
+
       if (cs) {
         center->delete_file_event(cs.fd(), EVENT_READABLE | EVENT_WRITABLE);
         cs.close();
@@ -418,6 +425,11 @@ void AsyncConnection::process() {
       ldout(async_msgr->cct, 10)
           << __func__ << " connect successfully, ready to send banner" << dendl;
       state = STATE_CONNECTION_ESTABLISHED;
+      ceph_assert(last_tick_id == 0);
+      // exclude TCP nonblock connect time
+      last_connect_started = ceph::coarse_mono_clock::now();
+      last_tick_id = center->create_time_event(
+        connect_timeout_us, tick_handler);
       break;
     }
 
@@ -733,13 +745,29 @@ void AsyncConnection::tick(uint64_t id)
                              << " last_active=" << last_active << dendl;
   std::lock_guard<std::mutex> l(lock);
   last_tick_id = 0;
-  auto idle_period = std::chrono::duration_cast<std::chrono::microseconds>(now - last_active).count();
-  if (inactive_timeout_us < (uint64_t)idle_period) {
-    ldout(async_msgr->cct, 1) << __func__ << " idle(" << idle_period << ") more than "
-                              << inactive_timeout_us
-                              << " us, mark self fault." << dendl;
-    protocol->fault();
-  } else if (is_connected()) {
-    last_tick_id = center->create_time_event(inactive_timeout_us, tick_handler);
+  if (!is_connected()) {
+    if (connect_timeout_us <=
+        (uint64_t)std::chrono::duration_cast<std::chrono::microseconds>
+          (now - last_connect_started).count()) {
+      ldout(async_msgr->cct, 0) << __func__ << " see no progress in more than "
+                                << connect_timeout_us
+                                << " us during connecting, fault."
+                                << dendl;
+      protocol->fault();
+    } else {
+      last_tick_id = center->create_time_event(connect_timeout_us, tick_handler);
+    }
+  } else {
+    auto idle_period = std::chrono::duration_cast<std::chrono::microseconds>
+      (now - last_active).count();
+    if (inactive_timeout_us < (uint64_t)idle_period) {
+      ldout(async_msgr->cct, 1) << __func__ << " idle (" << idle_period
+                                << ") for more than " << inactive_timeout_us
+                                << " us, fault."
+                                << dendl;
+      protocol->fault();
+    } else {
+      last_tick_id = center->create_time_event(inactive_timeout_us, tick_handler);
+    }
   }
 }
index 7b5f080bc326c457928cffa7b9acf4f518cd13e9..5b914cc5e639118151dfa9567da0166c24a0a0ef 100644 (file)
@@ -186,9 +186,11 @@ class AsyncConnection : public Connection {
   uint32_t recv_start;
   uint32_t recv_end;
   set<uint64_t> register_time_events; // need to delete it if stop
+  ceph::coarse_mono_clock::time_point last_connect_started;
   ceph::coarse_mono_clock::time_point last_active;
   ceph::mono_clock::time_point recv_start_time;
   uint64_t last_tick_id = 0;
+  const uint64_t connect_timeout_us;
   const uint64_t inactive_timeout_us;
 
   // Tis section are temp variables used by state transition
index 1f56332f852d278fb94e5b679dd371db9cf43d8d..af6ff6b97ab552efdef0f7a2a63ae71d5d6783a0 100644 (file)
@@ -2294,6 +2294,12 @@ CtPtr ProtocolV1::replace(AsyncConnectionRef existing,
             if (exproto->state == CLOSED) return;
             ceph_assert(exproto->state == NONE);
 
+            // we have called shutdown_socket above
+            ceph_assert(existing->last_tick_id == 0);
+            // restart timer since we are going to re-build connection
+            existing->last_connect_started = ceph::coarse_mono_clock::now();
+            existing->last_tick_id = existing->center->create_time_event(
+              existing->connect_timeout_us, existing->tick_handler);
             existing->state = AsyncConnection::STATE_CONNECTION_ESTABLISHED;
             exproto->state = ACCEPTING;
 
index 5098288946547936dbbc59be0ef858eb205519ca..511b95d53eeb199a8aa6a81c03f3fa1367ef2632 100644 (file)
@@ -2708,6 +2708,12 @@ CtPtr ProtocolV2::reuse_connection(AsyncConnectionRef existing,
           ceph_assert(exproto->state == NONE);
 
           exproto->state = SESSION_ACCEPTING;
+          // we have called shutdown_socket above
+          ceph_assert(existing->last_tick_id == 0);
+          // restart timer since we are going to re-build connection
+          existing->last_connect_started = ceph::coarse_mono_clock::now();
+          existing->last_tick_id = existing->center->create_time_event(
+            existing->connect_timeout_us, existing->tick_handler);
           existing->state = AsyncConnection::STATE_CONNECTION_ESTABLISHED;
           existing->center->create_file_event(existing->cs.fd(), EVENT_READABLE,
                                               existing->read_handler);