From 6ebd3d7b6f9a0c8c0d3377f38f96533fe537343c Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Fri, 13 Nov 2020 12:18:32 +0000 Subject: [PATCH] compat,msg: improve Windows socket checks win_socketpair can fail with EADDRINUSE under load, which will lead to an unhandled exception/crash as per this commit [1]. This change adds a retry, also ensuring that the right error code gets propagated (the one returned by WSAGetLastError() instead of the generic SOCKET_ERROR). While at it, we're fixing the "win_socketpair" indentation and addressing the SOCKET to int casts. [1] https://github.com/ceph/ceph/commit/633805060a1002c86570fe50d099e0c1e223e2d7 Signed-off-by: Lucian Petrut --- src/common/compat.cc | 146 ++++++++++++++++++++++++++--------------- src/msg/async/Event.cc | 6 +- 2 files changed, 94 insertions(+), 58 deletions(-) diff --git a/src/common/compat.cc b/src/common/compat.cc index 0e8d944a73c..d4f30d54c24 100644 --- a/src/common/compat.cc +++ b/src/common/compat.cc @@ -415,63 +415,101 @@ CEPH_CONSTRUCTOR(ceph_windows_init) { } } -int win_socketpair(int socks[2]) +int _win_socketpair(int socks[2]) { - union { - struct sockaddr_in inaddr; - struct sockaddr addr; - } a; - int listener; - int e; - socklen_t addrlen = sizeof(a.inaddr); - int reuse = 1; - - if (socks == 0) { - errno = -EINVAL; - return SOCKET_ERROR; - } + union { + struct sockaddr_in inaddr; + struct sockaddr addr; + } a; + SOCKET listener; + int e; + socklen_t addrlen = sizeof(a.inaddr); + int reuse = 1; + + if (socks == 0) { + WSASetLastError(WSAEINVAL); + return -1; + } + + listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (listener == INVALID_SOCKET) { + return -1; + } + + memset(&a, 0, sizeof(a)); + a.inaddr.sin_family = AF_INET; + a.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + a.inaddr.sin_port = 0; + + socks[0] = socks[1] = -1; + SOCKET s[2] = { INVALID_SOCKET, INVALID_SOCKET }; + + do { + if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, + (char*) &reuse, (socklen_t) sizeof(reuse)) == -1) + break; + if (bind(listener, &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) + break; + if (getsockname(listener, &a.addr, &addrlen) == SOCKET_ERROR) + break; + if (listen(listener, 1) == SOCKET_ERROR) + break; + s[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s[0] == INVALID_SOCKET) + break; + if (connect(s[0], &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) + break; + s[1] = accept(listener, NULL, NULL); + if (s[1] == INVALID_SOCKET) + break; - listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (listener == INVALID_SOCKET) - return SOCKET_ERROR; - - memset(&a, 0, sizeof(a)); - a.inaddr.sin_family = AF_INET; - a.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - a.inaddr.sin_port = 0; - - socks[0] = socks[1] = INVALID_SOCKET; - do { - if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, - (char*) &reuse, (socklen_t) sizeof(reuse)) == -1) - break; - if (bind(listener, &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) - break; - if (getsockname(listener, &a.addr, &addrlen) == SOCKET_ERROR) - break; - if (listen(listener, 1) == SOCKET_ERROR) - break; - socks[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (socks[0] == INVALID_SOCKET) - break; - if (connect(socks[0], &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) - break; - socks[1] = accept(listener, NULL, NULL); - if (socks[1] == INVALID_SOCKET) - break; - - closesocket(listener); - - return 0; - - } while (0); - - e = errno; closesocket(listener); - closesocket(socks[0]); - closesocket(socks[1]); - errno = e; - return SOCKET_ERROR; + + // The Windows socket API is mostly compatible with the Berkeley + // API, with a few exceptions. The Windows socket functions use + // SOCKET instead of int. The issue is that on x64 systems, + // SOCKET uses 64b while int uses 32b. There's been much debate + // whether casting a Windows socket to an int is safe or not. + // Worth noting that Windows kernel objects use 32b. For now, + // we're just adding a check. + // + // Ideally, we should update ceph to use the right type but this + // can be quite difficult, especially considering that there are + // a significant number of functions that accept both sockets and + // file descriptors. + if (s[0] >> 32 || s[1] >> 32) { + WSASetLastError(WSAENAMETOOLONG); + break; + } + + socks[0] = s[0]; + socks[1] = s[1]; + + return 0; + + } while (0); + + e = WSAGetLastError(); + closesocket(listener); + closesocket(s[0]); + closesocket(s[1]); + WSASetLastError(e); + return -1; +} + +int win_socketpair(int socks[2]) { + int r = 0; + for (int i = 0; i < 15; i++) { + r = _win_socketpair(socks); + if (r && WSAGetLastError() == WSAEADDRINUSE) { + sleep(2); + continue; + } + else { + break; + } + } + return r; } unsigned get_page_size() { diff --git a/src/msg/async/Event.cc b/src/msg/async/Event.cc index 1e109230282..2c545c07b37 100644 --- a/src/msg/async/Event.cc +++ b/src/msg/async/Event.cc @@ -148,16 +148,14 @@ int EventCenter::init(int nevent, unsigned center_id, const std::string &type) int fds[2]; #ifdef _WIN32 - r = win_socketpair(fds); - if (r != 0) - return -r; + if (win_socketpair(fds) < 0) { #else if (pipe_cloexec(fds, 0) < 0) { + #endif int e = ceph_sock_errno(); lderr(cct) << __func__ << " can't create notify pipe: " << cpp_strerror(e) << dendl; return -e; } - #endif notify_receive_fd = fds[0]; notify_send_fd = fds[1]; -- 2.39.5