From: Dai zhiwei Date: Thu, 31 Oct 2019 01:29:59 +0000 (+0800) Subject: osd/OSD: enhance osd numa affinity compatibility X-Git-Tag: v15.1.0~701^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=637fdea62b08c4d675a280e2218bcef1039ba25d;p=ceph-ci.git osd/OSD: enhance osd numa affinity compatibility add bond network numa affinity compatibility support add subnet compatibility support add public and cluster network numa nodes not matching log Fixes: https://tracker.ceph.com/issues/42411 Signed-off-by: Dai zhiwei --- diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc index 4e9f6174a04..e6da4248b50 100644 --- a/src/common/pick_address.cc +++ b/src/common/pick_address.cc @@ -25,6 +25,9 @@ #include "common/numa.h" #include +#include +#include +#include #define dout_subsys ceph_subsys_ @@ -508,15 +511,29 @@ int get_iface_numa_node( const std::string& iface, int *node) { - string fn = std::string("/sys/class/net/") + iface + "/device/numa_node"; + int ifatype = IFACE_DEFAULT; + string ifa = iface; + int pos = ifa.find(":"); + if (pos != string::npos) { + ifa.erase(pos); + } + string fn = std::string("/sys/class/net/") + ifa + "/device/numa_node"; + int fd = ::open(fn.c_str(), O_RDONLY); + if (fd < 0) { + fn = std::string("/sys/class/net/") + ifa + "/bonding/slaves"; + fd = ::open(fn.c_str(), O_RDONLY); + if (fd < 0) { + return -errno; + } + ifatype = IFACE_BOND_PORT; + } else { + ifatype = IFACE_PHY_PORT; + } int r = 0; char buf[1024]; char *endptr = 0; - int fd = ::open(fn.c_str(), O_RDONLY); - if (fd < 0) { - return -errno; - } + int bond_node = -1; r = safe_read(fd, &buf, sizeof(buf)); if (r < 0) { goto out; @@ -525,13 +542,43 @@ int get_iface_numa_node( while (r > 0 && ::isspace(buf[--r])) { buf[r] = 0; } - *node = strtoll(buf, &endptr, 10); - if (endptr != buf + strlen(buf)) { - r = -EINVAL; - goto out; + + switch (ifatype) { + case IFACE_PHY_PORT: + *node = strtoll(buf, &endptr, 10); + if (endptr != buf + strlen(buf)) { + r = -EINVAL; + goto out; + } + r = 0; + break; + case IFACE_BOND_PORT: + std::vector sv; + char *q, *p = strtok_r(buf, " ", &q); + while (p != NULL) { + sv.push_back(p); + p = strtok_r(NULL, " ", &q); + } + for (auto& iter : sv) { + int bn = -1; + r = get_iface_numa_node(iter, &bn); + if (r >= 0) { + if (bond_node == -1 || bn == bond_node) { + bond_node = bn; + } else { + *node = -2; + goto out; + } + } else { + goto out; + } + } + *node = bond_node; + break; } - r = 0; - out: + + out: ::close(fd); return r; } + diff --git a/src/common/pick_address.h b/src/common/pick_address.h index aa87e7c51eb..ba9473a9649 100644 --- a/src/common/pick_address.h +++ b/src/common/pick_address.h @@ -20,6 +20,12 @@ class entity_addrvec_t; #define CEPH_PICK_ADDRESS_PREFER_IPV4 0x40 #define CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS 0x80 +enum IfaceType { + IFACE_DEFAULT = 0, + IFACE_PHY_PORT = 1, + IFACE_BOND_PORT = 2 +}; + #ifndef WITH_SEASTAR /* Pick addresses based on subnets if needed. diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 175d495383f..d52fb2bf6ea 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2269,11 +2269,11 @@ int OSD::set_numa_affinity() cct, cluster_messenger->get_myaddrs().front().get_sockaddr_storage()); int r = get_iface_numa_node(front_iface, &front_node); - if (r >= 0) { + if (r >= 0 && front_node >= 0) { dout(1) << __func__ << " public network " << front_iface << " numa node " - << front_node << dendl; + << front_node << dendl; r = get_iface_numa_node(back_iface, &back_node); - if (r >= 0) { + if (r >= 0 && back_node >= 0) { dout(1) << __func__ << " cluster network " << back_iface << " numa node " << back_node << dendl; if (front_node == back_node && @@ -2282,14 +2282,23 @@ int OSD::set_numa_affinity() if (g_conf().get_val("osd_numa_auto_affinity")) { numa_node = front_node; } + } else if (front_node != back_node) { + dout(1) << __func__ << " public and cluster network numa nodes do not match" + << dendl; } else { dout(1) << __func__ << " objectstore and network numa nodes do not match" << dendl; } + } else if (back_node == -2) { + dout(1) << __func__ << " cluster network " << back_iface + << " ports numa nodes do not match" << dendl; } else { derr << __func__ << " unable to identify cluster interface '" << back_iface << "' numa node: " << cpp_strerror(r) << dendl; } + } else if (front_node == -2) { + dout(1) << __func__ << " public network " << front_iface + << " ports numa nodes do not match" << dendl; } else { derr << __func__ << " unable to identify public interface '" << front_iface << "' numa node: " << cpp_strerror(r) << dendl;