SMC-D is faster than ethernet in IBM Z LPARs and/or VMs (zVM or KVM).
Fixes: https://tracker.ceph.com/issues/66702
Signed-off-by: Aliaksei Makarau <aliaksei.makarau@ibm.com>
level: advanced
desc: Messenger implementation to use for network communication
fmt_desc: Transport type used by Async Messenger. Can be ``async+posix``,
- ``async+dpdk`` or ``async+rdma``. Posix uses standard TCP/IP networking and is
+ ``async+dpdk``, ``async+rdma``, or ``async+smc``. Posix uses standard TCP/IP networking and is
default. Other transports may be experimental and support may be limited.
default: async+posix
flags:
transport_type = "rdma";
else if (type.find("dpdk") != std::string::npos)
transport_type = "dpdk";
+ else if (type.find("smc") != std::string::npos)
+ transport_type = "smc";
auto single = &cct->lookup_or_create_singleton_object<StackSingleton>(
"AsyncMessenger::NetworkStack::" + transport_type, true, cct);
return 0;
}
-PosixNetworkStack::PosixNetworkStack(CephContext *c)
- : NetworkStack(c)
+PosixNetworkStack::PosixNetworkStack(CephContext *c, bool try_smc)
+ : NetworkStack(c), try_smc(try_smc)
{
}
ceph::NetHandler net;
void initialize() override;
public:
- PosixWorker(CephContext *c, unsigned i)
- : Worker(c, i), net(c) {}
+ PosixWorker(CephContext *c, unsigned i, bool try_smc)
+ : Worker(c, i), net(c, try_smc) {}
int listen(entity_addr_t &sa,
unsigned addr_slot,
const SocketOptions &opt,
class PosixNetworkStack : public NetworkStack {
std::vector<std::thread> threads;
+ bool try_smc;
virtual Worker* create_worker(CephContext *c, unsigned worker_id) override {
- return new PosixWorker(c, worker_id);
+ return new PosixWorker(c, worker_id, try_smc);
}
public:
- explicit PosixNetworkStack(CephContext *c);
+ explicit PosixNetworkStack(CephContext *c, bool try_smc);
void spawn_worker(std::function<void ()> &&func) override {
threads.emplace_back(std::move(func));
std::shared_ptr<NetworkStack> stack = nullptr;
if (t == "posix")
- stack.reset(new PosixNetworkStack(c));
+ stack.reset(new PosixNetworkStack(c, false));
+ else if (t == "smc")
+ stack.reset(new PosixNetworkStack(c, true));
#ifdef HAVE_RDMA
else if (t == "rdma")
stack.reset(new RDMAStack(c));
#undef dout_prefix
#define dout_prefix *_dout << "NetHandler "
+#ifndef SMCPROTO_SMC
+ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
+ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
+#endif
+
namespace ceph{
int NetHandler::create_socket(int domain, bool reuse_addr)
{
int s;
int r = 0;
+ int protocol = IPPROTO_TCP;
+
+#if defined(AF_SMC)
+ if (this->try_smc) {
+ /* check if socket is eligible for AF_SMC */
+ if (domain == AF_INET || domain == AF_INET6) {
+ if (domain == AF_INET)
+ protocol = SMCPROTO_SMC;
+ else /* AF_INET6 */
+ protocol = SMCPROTO_SMC6;
+ domain = AF_SMC;
+ }
+ }
+#endif
- if ((s = socket_cloexec(domain, SOCK_STREAM, 0)) == -1) {
+ if ((s = socket_cloexec(domain, SOCK_STREAM, protocol)) == -1) {
r = ceph_sock_errno();
lderr(cct) << __func__ << " couldn't create socket " << cpp_strerror(r) << dendl;
return -r;
int generic_connect(const entity_addr_t& addr, const entity_addr_t& bind_addr, bool nonblock);
CephContext *cct;
+ bool try_smc;
public:
int create_socket(int domain, bool reuse_addr=false);
- explicit NetHandler(CephContext *c): cct(c) {}
+ explicit NetHandler(CephContext *c, bool try_smc=false): cct(c), try_smc(try_smc) {
+ }
int set_nonblock(int sd);
int set_socket_options(int sd, bool nodelay, int size);
int connect(const entity_addr_t &addr, const entity_addr_t& bind_addr);