From a666c06688269b91974d957a757452012d304f08 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Mon, 18 Mar 2019 09:55:46 +0800 Subject: [PATCH] msg/async v2: make v2 work on rdma. When exec " MON=1 OSD=1 RGW=0 MDS=0 MGR=0 ../src/vstart.sh -b -n -o "ms_type=async+rdma" -o "ms_async_rdma_device_name=mlx5_1" --msgr2 -o "debug ms=2/2" ", met the following error messages: 2019-03-18 10:04:17.074 7f6644dd8700 1 --2- >> v2:10.239.48.9:40998/0 conn(0x7f66401691f0 0x7f6640169630 crc :-1 s=SESSION_CONNECTING pgs=0 cs=0 l=0 rx=0 tx=0).send_client_ident getsockname reveals I am : when talking to v2:10.239.48.9:40998/0 /home/ceph/jp-ceph/src/msg/msg_types.h: In function 'void entity_addr_t::set_port(int)' thread 7f6644dd8700 time 2019-03-18 10:04:17.079278 /home/ceph/jp-ceph/src/msg/msg_types.h: 363: abort() ceph version 14.1.1-196-g9c024bafa3 (9c024bafa3570dee401d3fcaf04754376f285eca) nautilus (rc) 1: (ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string, std::allocator > const&)+0xfe) [0x7f664cc6e896] 2: (entity_addr_t::set_port(int)+0xaa) [0x7f664cc3a724] 3: (ProtocolV2::send_client_ident()+0x3ba) [0x7f664d005270] 4: (ProtocolV2::finish_client_auth()+0x4c) [0x7f664d004d8e] 5: (ProtocolV2::handle_auth_signature(ceph::buffer::v14_2_0::list&)+0x612) [0x7f664d00ad70] 6: (ProtocolV2::handle_frame_payload()+0x275) [0x7f664cffde79] 7: (ProtocolV2::handle_read_frame_dispatch()+0x158) [0x7f664cffcbea] 8: (ProtocolV2::handle_read_frame_epilogue_main(std::unique_ptr&&, int)+0x80d) [0x7f664cffef77] 9: (CtRxNode::call(ProtocolV2*) const+0x8c) [0x7f664d02ac40] 10: (ProtocolV2::run_continuation(Ct&)+0x5e) [0x7f664cff144e] 11: (()+0x175a34b) [0x7f664cff834b] 12: (()+0x1774e00) [0x7f664d012e00] 13: (std::function::operator()(char*, long) const+0x61) [0x7f664cfb6269] 14: (AsyncConnection::process()+0xc77) [0x7f664cfb16f7] 15: (C_handle_read::do_request(unsigned long)+0x28) [0x7f664cfb5472] 16: (EventCenter::process_events(unsigned int, std::chrono::duration >*)+0x678) [0x7f664d02e4cc] 17: (()+0x179dbf4) [0x7f664d03bbf4] 18: (()+0x179f0c7) [0x7f664d03d0c7] 19: (std::function::operator()() const+0x32) [0x7f664d03a44a] 20: (void std::__invoke_impl>(std::__invoke_other, std::function&&)+0x20) [0x7f664d039ce7] 21: (std::__invoke_result>::type std::__invoke>(std::function&&)+0x26) [0x7f664d039474] 22: (decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker > >::_M_invoke<0ul>(std::_Index_tuple<0ul>)+0x28) [0x7f664d03b3aa] 23: (std::thread::_Invoker > >::operator()()+0x1d) [0x7f664d03b37b] 24: (std::thread::_State_impl > > >::_M_run()+0x1c) [0x7f664d03b35a] 25: (()+0xbd57f) [0x7f664af5257f] 26: (()+0x76db) [0x7f6657d996db] 27: (clone()+0x3f) [0x7f66580d288f] This because getsockname can't work on connection->cs.fd(). So add new api socket_fd() for rdma socket fd. Signed-off-by: Jianpeng Ma (cherry picked from commit de39c8534be644f760d7e7ccee7b782349237c1d) --- src/msg/async/PosixStack.cc | 3 +++ src/msg/async/ProtocolV2.cc | 3 ++- src/msg/async/Stack.h | 4 ++++ src/msg/async/dpdk/DPDKStack.h | 4 ++++ src/msg/async/rdma/RDMAStack.h | 1 + 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/msg/async/PosixStack.cc b/src/msg/async/PosixStack.cc index afd392bc089cf..2757ce8b545e3 100644 --- a/src/msg/async/PosixStack.cc +++ b/src/msg/async/PosixStack.cc @@ -161,6 +161,9 @@ class PosixConnectedSocketImpl final : public ConnectedSocketImpl { int fd() const override { return _fd; } + int socket_fd() const override { + return _fd; + } friend class PosixServerSocketImpl; friend class PosixNetworkStack; }; diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index 8a90ffcaec537..a631d56dcd542 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -1823,7 +1823,8 @@ CtPtr ProtocolV2::send_client_ident() { messenger->get_myaddrs().front().is_blank_ip()) { sockaddr_storage ss; socklen_t len = sizeof(ss); - getsockname(connection->cs.fd(), (sockaddr *)&ss, &len); + int r = getsockname(connection->cs.socket_fd(), (sockaddr *)&ss, &len); + ceph_assert(r == 0); ldout(cct, 1) << __func__ << " getsockname reveals I am " << (sockaddr *)&ss << " when talking to " << connection->target_addr << dendl; entity_addr_t a; diff --git a/src/msg/async/Stack.h b/src/msg/async/Stack.h index bccdaab5e65e6..a093dadbc64a4 100644 --- a/src/msg/async/Stack.h +++ b/src/msg/async/Stack.h @@ -33,6 +33,7 @@ class ConnectedSocketImpl { virtual void shutdown() = 0; virtual void close() = 0; virtual int fd() const = 0; + virtual int socket_fd() const = 0; }; class ConnectedSocket; @@ -129,6 +130,9 @@ class ConnectedSocket { int fd() const { return _csi->fd(); } + int socket_fd() const { + return _csi->socket_fd(); + } explicit operator bool() const { return _csi.get(); diff --git a/src/msg/async/dpdk/DPDKStack.h b/src/msg/async/dpdk/DPDKStack.h index cc374af938479..a44ae38367f9d 100644 --- a/src/msg/async/dpdk/DPDKStack.h +++ b/src/msg/async/dpdk/DPDKStack.h @@ -176,6 +176,10 @@ class NativeConnectedSocketImpl : public ConnectedSocketImpl { virtual int fd() const override { return _conn.fd(); } + virtual int socket_fd() const override { + return _conn.fd(); + } + }; template diff --git a/src/msg/async/rdma/RDMAStack.h b/src/msg/async/rdma/RDMAStack.h index c10a5389bcf3a..e038d3625986a 100644 --- a/src/msg/async/rdma/RDMAStack.h +++ b/src/msg/async/rdma/RDMAStack.h @@ -221,6 +221,7 @@ class RDMAConnectedSocketImpl : public ConnectedSocketImpl { virtual void shutdown() override; virtual void close() override; virtual int fd() const override { return notify_fd; } + virtual int socket_fd() const override { return tcp_fd; } void fault(); const char* get_qp_state() { return Infiniband::qp_state_string(qp->get_state()); } ssize_t submit(bool more); -- 2.39.5