From e1d147fa7ae73502817563cf86988f7397964a34 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 4 Jan 2019 12:11:04 -0600 Subject: [PATCH] osd: support osd_numa_node, osd_numa_auto_affinity, osd_numa_prefer_iface - osd_numa_node manually specifies a numa node. - osd_numa_prefer_iface makes us prefer IPs in public_network that are on the same numa node as the storage - osd_numa_auto_affinity will set affinity to a numa node when both the store and network(s) are on the same numa node. These options are all flagged as 'startup', although osd_numa_node and osd_numa_auto_affinity takes effect when the OSD is marked up, so doing 'ceph osd down ...' is sufficient to induce an update. Signed-off-by: Sage Weil --- src/ceph_osd.cc | 19 ++++++++- src/common/options.cc | 17 +++++++++ src/osd/OSD.cc | 89 +++++++++++++++++++++++++++++++++++++++---- src/osd/OSD.h | 5 +++ 4 files changed, 121 insertions(+), 9 deletions(-) diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc index 7a00a81730720..0a4814cf54137 100644 --- a/src/ceph_osd.cc +++ b/src/ceph_osd.cc @@ -35,6 +35,7 @@ #include "common/Timer.h" #include "common/TracepointProvider.h" #include "common/ceph_argparse.h" +#include "common/numa.h" #include "global/global_init.h" #include "global/signal_handler.h" @@ -471,6 +472,18 @@ flushjournal_out: forker.exit(0); } + // consider objectstore numa node + int os_numa_node = -1; + r = store->get_numa_node(&os_numa_node, nullptr, nullptr); + if (r >= 0 && os_numa_node >= 0) { + dout(1) << " objectstore numa_node " << os_numa_node << dendl; + } + int iface_preferred_numa_node = -1; + if (g_conf().get_val("osd_numa_prefer_iface")) { + iface_preferred_numa_node = os_numa_node; + } + + // messengers std::string msg_type = g_conf().get_val("ms_type"); std::string public_msg_type = g_conf().get_val("ms_public_type"); @@ -561,12 +574,14 @@ flushjournal_out: ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); entity_addrvec_t public_addrs, cluster_addrs; - r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs); + r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs, + iface_preferred_numa_node); if (r < 0) { derr << "Failed to pick public address." << dendl; forker.exit(1); } - r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs); + r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs, + iface_preferred_numa_node); if (r < 0) { derr << "Failed to pick cluster address." << dendl; forker.exit(1); diff --git a/src/common/options.cc b/src/common/options.cc index d54da1e8adf51..0b1038100a406 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -2173,6 +2173,23 @@ std::vector