OPTION(ms_async_rdma_receive_buffers, OPT_U32, 10240)
OPTION(ms_async_rdma_port_num, OPT_U32, 1)
OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
+OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
+OPTION(ms_async_rdma_roce_ver, OPT_INT, 2) // 2=RoCEv2, 1=RoCEv1.5, 0=RoCEv1
+OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP
OPTION(ms_dpdk_port_id, OPT_INT, 0)
OPTION(ms_dpdk_coremask, OPT_STR, "1")
}\r
}\r
\r
+Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr) {\r
+ union ibv_gid cgid;\r
+ struct ibv_exp_gid_attr gid_attr;\r
+\r
+ int r = ibv_query_port(ctxt, port_num, port_attr);\r
+ if (r == -1) {\r
+ lderr(cct) << __func__ << " query port failed " << cpp_strerror(errno) << dendl;\r
+ ceph_abort();\r
+ }\r
+\r
+ lid = port_attr->lid;\r
+\r
+ // search for requested GID in GIDs table\r
+ ldout(cct, 1) << __func__ << " looking for local GID " << (cct->_conf->ms_async_rdma_local_gid)\r
+ << " of type " << (cct->_conf->ms_async_rdma_roce_ver) << dendl;\r
+ sscanf(cct->_conf->ms_async_rdma_local_gid.c_str(),\r
+ "%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx"\r
+ ":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx",\r
+ &cgid.raw[ 0], &cgid.raw[ 1],\r
+ &cgid.raw[ 2], &cgid.raw[ 3],\r
+ &cgid.raw[ 4], &cgid.raw[ 5],\r
+ &cgid.raw[ 6], &cgid.raw[ 7],\r
+ &cgid.raw[ 8], &cgid.raw[ 9],\r
+ &cgid.raw[10], &cgid.raw[11],\r
+ &cgid.raw[12], &cgid.raw[13],\r
+ &cgid.raw[14], &cgid.raw[15]);\r
+\r
+ gid_attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE;\r
+\r
+ for (gid_idx = 0; gid_idx < port_attr->gid_tbl_len; gid_idx++) {\r
+ r = ibv_query_gid(ctxt, port_num, gid_idx, &gid);\r
+ if (r) {\r
+ lderr(cct) << __func__ << " query gid of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;\r
+ ceph_abort();\r
+ }\r
+ r = ibv_exp_query_gid_attr(ctxt, port_num, gid_idx, &gid_attr);\r
+ if (r) {\r
+ lderr(cct) << __func__ << " query gid attributes of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;\r
+ ceph_abort();\r
+ }\r
+ if ( (gid_attr.type == cct->_conf->ms_async_rdma_roce_ver) &&\r
+ (memcmp(&gid, &cgid, 16) == 0) ) {\r
+ ldout(cct, 1) << __func__ << " found at index " << gid_idx << dendl;\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (gid_idx == port_attr->gid_tbl_len) {\r
+ lderr(cct) << __func__ << " Requested local GID was not found in GID table" << dendl;\r
+ ceph_abort();\r
+ }\r
+ }\r
+\r
void Device::binding_port(CephContext *cct, uint8_t port_num) {\r
port_cnt = device_attr->phys_port_cnt;\r
ports = new Port*[port_cnt];\r
struct ibv_context* ctxt;\r
uint8_t port_num;\r
struct ibv_port_attr* port_attr;\r
- int gid_tbl_len;\r
uint16_t lid;\r
+ int gid_idx;\r
union ibv_gid gid;\r
\r
public:\r
- explicit Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr) {\r
- int r = ibv_query_port(ctxt, port_num, port_attr);\r
- if (r == -1) {\r
- lderr(cct) << __func__ << " query port failed " << cpp_strerror(errno) << dendl;\r
- ceph_abort();\r
- }\r
-\r
- lid = port_attr->lid;\r
- r = ibv_query_gid(ctxt, port_num, 0, &gid);\r
- if (r) {\r
- lderr(cct) << __func__ << " query gid failed " << cpp_strerror(errno) << dendl;\r
- ceph_abort();\r
- }\r
- }\r
-\r
+ explicit Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn);\r
uint16_t get_lid() { return lid; }\r
ibv_gid get_gid() { return gid; }\r
uint8_t get_port_num() { return port_num; }\r
ibv_port_attr* get_port_attr() { return port_attr; }\r
+ int get_gid_idx() { return gid_idx; }\r
};\r
\r
\r
const char* get_name() { return name;}\r
uint16_t get_lid() { return active_port->get_lid(); }\r
ibv_gid get_gid() { return active_port->get_gid(); }\r
+ int get_gid_idx() { return active_port->get_gid_idx(); }\r
void binding_port(CephContext *c, uint8_t port_num);\r
struct ibv_context *ctxt;\r
ibv_device_attr *device_attr;\r
qpa.ah_attr.is_global = 1;\r
qpa.ah_attr.grh.hop_limit = 6;\r
qpa.ah_attr.grh.dgid = peer_msg.gid;\r
- qpa.ah_attr.grh.sgid_index = 0;\r
+\r
+ qpa.ah_attr.grh.sgid_index = infiniband->get_device()->get_gid_idx();\r
\r
qpa.ah_attr.dlid = peer_msg.lid;\r
- qpa.ah_attr.sl = 0;\r
+ qpa.ah_attr.sl = cct->_conf->ms_async_rdma_sl;\r
qpa.ah_attr.src_path_bits = 0;\r
qpa.ah_attr.port_num = (uint8_t)(infiniband->get_ib_physical_port());\r
\r
+ ldout(cct, 20) << __func__ << " Choosing gid_index " << (int)qpa.ah_attr.grh.sgid_index << ", sl " << (int)qpa.ah_attr.sl << dendl;\r
+\r
r = ibv_modify_qp(qp->get_qp(), &qpa, IBV_QP_STATE |\r
IBV_QP_AV |\r
IBV_QP_PATH_MTU |\r