]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: set affinity for *all* threads
authorSage Weil <sage@redhat.com>
Thu, 3 Oct 2019 15:04:19 +0000 (10:04 -0500)
committerSage Weil <sage@redhat.com>
Thu, 3 Oct 2019 15:29:50 +0000 (10:29 -0500)
It turns out schedule_setaffinity(2) only sets the current *thread*'s
affinity, not the entire process.  Look in /proc to identify all of our
children and map those, too.

Note that this doesn't seem to affect the numa_preferred_nid value in
/proc/$pid/[task/$tid/]sched... at least not immediately.  That value does
seem to change for some threads after some load is applied.

Fixes: https://tracker.ceph.com/issues/42054
Signed-off-by: Sage Weil <sage@redhat.com>
src/common/numa.cc
src/common/numa.h
src/osd/OSD.cc

index c75f50088eb10904f9172130030a73c97e6fd2cd..dc80d0f33bde85aefad01c78a50fdd2c57b4bdf6 100644 (file)
@@ -134,6 +134,56 @@ int get_numa_node_cpu_set(
   return r;
 }
 
+static int easy_readdir(const std::string& dir, std::set<std::string> *out)
+{
+  DIR *h = ::opendir(dir.c_str());
+  if (!h) {
+    return -errno;
+  }
+  struct dirent *de = nullptr;
+  while ((de = ::readdir(h))) {
+    if (strcmp(de->d_name, ".") == 0 ||
+       strcmp(de->d_name, "..") == 0) {
+      continue;
+    }
+    out->insert(de->d_name);
+  }
+  closedir(h);
+  return 0;
+}
+
+int set_cpu_affinity_all_threads(size_t cpu_set_size, cpu_set_t *cpu_set)
+{
+  // first set my affinity
+  int r = sched_setaffinity(getpid(), cpu_set_size, cpu_set);
+  if (r < 0) {
+    return -errno;
+  }
+
+  // make 2 passes here so that we (hopefully) catch racing threads creating
+  // threads.
+  for (unsigned pass = 0; pass < 2; ++pass) {
+    // enumerate all child threads from /proc
+    std::set<std::string> ls;
+    std::string path = "/proc/"s + stringify(getpid()) + "/task";
+    r = easy_readdir(path, &ls);
+    if (r < 0) {
+      return r;
+    }
+    for (auto& i : ls) {
+      pid_t tid = atoll(i.c_str());
+      if (!tid) {
+       continue;  // wtf
+      }
+      r = sched_setaffinity(tid, cpu_set_size, cpu_set);
+      if (r < 0) {
+       return -errno;
+      }
+    }
+  }
+  return 0;
+}
+
 #elif defined(__FreeBSD__)
 
 int parse_cpu_set_list(const char *s,
@@ -162,4 +212,10 @@ int get_numa_node_cpu_set(int node,
   return -ENOTSUP;
 }
 
+int set_cpu_affinity_all_threads(size_t cpu_set_size,
+                                cpu_set_t *cpu_set)
+{
+  return -ENOTSUP;
+}
+
 #endif
index 4acb8280c8f4cb15528bdf06ac44bed45196260c..78851deef514870c718e533132215380c95fb47b 100644 (file)
@@ -19,3 +19,6 @@ std::set<int> cpu_set_to_set(size_t cpu_set_size,
 int get_numa_node_cpu_set(int node,
                          size_t *cpu_set_size,
                          cpu_set_t *cpu_set);
+
+int set_cpu_affinity_all_threads(size_t cpu_set_size,
+                                cpu_set_t *cpu_set);
index fc7f5654e521e98a3f0f5c624d781c1ffe71244d..d461f9f24eeb22eca75aebec10376d71c6f9e3f7 100644 (file)
@@ -2313,7 +2313,7 @@ int OSD::set_numa_affinity()
              << " cpus "
              << cpu_set_to_str_list(numa_cpu_set_size, &numa_cpu_set)
              << dendl;
-      r = sched_setaffinity(getpid(), numa_cpu_set_size, &numa_cpu_set);
+      r = set_cpu_affinity_all_threads(numa_cpu_set_size, &numa_cpu_set);
       if (r < 0) {
        r = -errno;
        derr << __func__ << " failed to set numa affinity: " << cpp_strerror(r)