]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async/rdma: fix bug event center is blocked by rdma construct connection for...
authorliupengs <liupeng37@baidu.com>
Sun, 17 Nov 2019 15:03:07 +0000 (23:03 +0800)
committerVicente Cheng <freeze.bilsted@gmail.com>
Tue, 28 Apr 2020 02:55:21 +0000 (02:55 +0000)
We construct a tcp connection to transport ib sync msg, if the
remote node is shutdown (shutdown by accident), the net.connect will be blocked until timeout
is reached, which cause the event center be blocked.

This bug may cause mon probe timeout and osd not reply, and so on.

Signed-off-by: Peng Liu <liupeng37@baidu.com>
(cherry picked from commit 8b2a95011ca34ba3880440339693170a174034ab)

Conflicts:
src/msg/async/rdma/RDMAConnectedSocketImpl.cc
  - use origin send message handling in nautilus

src/msg/async/rdma/RDMAConnectedSocketImpl.cc
src/msg/async/rdma/RDMAStack.h

index 37a4eef4c03afddaac5e6827910e766a2a50225f..a2c072a035ed308bb41efc3d45064e08953dbb18 100644 (file)
  */
 #include "RDMAStack.h"
 
+class C_handle_connection_established : public EventCallback {
+  RDMAConnectedSocketImpl *csi;
+  bool active = true;
+ public:
+  C_handle_connection_established(RDMAConnectedSocketImpl *w) : csi(w) {}
+  void do_request(uint64_t fd) final {
+    if (active)
+      csi->handle_connection_established();
+  }
+  void close() {
+    active = false;
+  }
+};
+
+
 #define dout_subsys ceph_subsys_ms
 #undef dout_prefix
 #define dout_prefix *_dout << " RDMAConnectedSocketImpl "
@@ -24,6 +39,7 @@ RDMAConnectedSocketImpl::RDMAConnectedSocketImpl(CephContext *cct, Infiniband* i
   : cct(cct), connected(0), error(0), infiniband(ib),
     dispatcher(s), worker(w), lock("RDMAConnectedSocketImpl::lock"),
     is_server(false), con_handler(new C_handle_connection(this)),
+    established_handler(new C_handle_connection_established(this)),
     active(false), pending(false)
 {
   if (!cct->_conf->ms_async_rdma_cm) {
@@ -173,7 +189,14 @@ int RDMAConnectedSocketImpl::try_connect(const entity_addr_t& peer_addr, const S
   ldout(cct, 20) << __func__ << " nonblock:" << opts.nonblock << ", nodelay:"
                  << opts.nodelay << ", rbuf_size: " << opts.rcbuf_size << dendl;
   NetHandler net(cct);
-  tcp_fd = net.connect(peer_addr, opts.connect_bind_addr);
+
+  // we construct a socket to transport ib sync message
+  // but we shouldn't block in tcp connecting
+  if (opts.nonblock) {
+    tcp_fd = net.nonblock_connect(peer_addr, opts.connect_bind_addr);
+  } else {
+    tcp_fd = net.connect(peer_addr, opts.connect_bind_addr);
+  }
 
   if (tcp_fd < 0) {
     return -errno;
@@ -188,12 +211,38 @@ int RDMAConnectedSocketImpl::try_connect(const entity_addr_t& peer_addr, const S
 
   ldout(cct, 20) << __func__ << " tcp_fd: " << tcp_fd << dendl;
   net.set_priority(tcp_fd, opts.priority, peer_addr.get_family());
+  r = 0;
+  if (opts.nonblock) {
+    worker->center.create_file_event(tcp_fd, EVENT_READABLE | EVENT_WRITABLE , established_handler);
+  } else {
+    r = handle_connection_established(false);
+  }
+  return r;
+}
+
+int RDMAConnectedSocketImpl::handle_connection_established(bool need_set_fault) {
+  ldout(cct, 20) << __func__ << " start " << dendl;
+  // delete read event
+  worker->center.delete_file_event(tcp_fd, EVENT_READABLE | EVENT_WRITABLE);
+  if (1 == connected) {
+    ldout(cct, 1) << __func__ << " warnning: logic failed " << dendl;
+    if (need_set_fault) {
+      fault();
+    }
+    return -1;
+  }
+  // send handshake msg to server
   my_msg.peer_qpn = 0;
-  r = infiniband->send_msg(cct, tcp_fd, my_msg);
-  if (r < 0)
+  int r = infiniband->send_msg(cct, tcp_fd, my_msg);
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " send handshake msg failed." << r << dendl;
+    if (need_set_fault) {
+      fault();
+    }
     return r;
-
+  }
   worker->center.create_file_event(tcp_fd, EVENT_READABLE, con_handler);
+  ldout(cct, 20) << __func__ << " finish " << dendl;
   return 0;
 }
 
@@ -611,11 +660,16 @@ void RDMAConnectedSocketImpl::cleanup() {
   if (con_handler && tcp_fd >= 0) {
     (static_cast<C_handle_connection*>(con_handler))->close();
     worker->center.submit_to(worker->center.get_id(), [this]() {
-      worker->center.delete_file_event(tcp_fd, EVENT_READABLE);
+      worker->center.delete_file_event(tcp_fd, EVENT_READABLE | EVENT_WRITABLE);
     }, false);
     delete con_handler;
     con_handler = nullptr;
   }
+  if (established_handler) {
+    (static_cast<C_handle_connection_established*>(established_handler))->close();
+    delete established_handler;
+    established_handler = nullptr;
+  }
 }
 
 void RDMAConnectedSocketImpl::notify()
index e038d3625986aa173d9c588ec8532c55467613eb..3b6ed4e038c1657f3d5aaa0d683425a24e4ae636 100644 (file)
@@ -197,6 +197,7 @@ class RDMAConnectedSocketImpl : public ConnectedSocketImpl {
   std::vector<ibv_wc> wc;
   bool is_server;
   EventCallbackRef con_handler;
+  EventCallbackRef established_handler;
   int tcp_fd = -1;
   bool active;// qp is active ?
   bool pending;
@@ -228,6 +229,7 @@ class RDMAConnectedSocketImpl : public ConnectedSocketImpl {
   int activate();
   void fin();
   void handle_connection();
+  int handle_connection_established(bool need_set_fault = true);
   void cleanup();
   void set_accept_fd(int sd);
   virtual int try_connect(const entity_addr_t&, const SocketOptions &opt);
@@ -249,6 +251,7 @@ class RDMAConnectedSocketImpl : public ConnectedSocketImpl {
       active = false;
     }
   };
+  
 };
 
 enum RDMA_CM_STATUS {