]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
test/crush/crush.cc: add tests specifically for MSR
authorSamuel Just <sjust@redhat.com>
Fri, 17 Nov 2023 04:26:07 +0000 (20:26 -0800)
committerSamuel Just <sjust@redhat.com>
Sun, 4 Feb 2024 05:00:11 +0000 (21:00 -0800)
Signed-off-by: Samuel Just <sjust@redhat.com>
src/test/crush/crush.cc

index 03bbca97e32ff333b46de3308abea430679f7fa1..2be7d5540d8dcbfb7e7865fd856fa3319a56cd14 100644 (file)
@@ -1103,3 +1103,459 @@ TEST_F(CRUSHTest, straw2_reweight) {
     cout << "     vs " << estddev << std::endl;
   }
 }
+
+struct cluster_test_spec_t {
+  const int num_osds_per_host;
+  const int num_hosts;
+
+  const int num_hosts_mapped;
+  const int num_mapped_per_host;
+  const int num_mapped_size;
+
+  const int num_osds;
+
+  cluster_test_spec_t(
+    int num_osds_per_host, int num_hosts,
+    int num_hosts_mapped, int num_mapped_per_host, int num_mapped_size)
+    : num_osds_per_host(num_osds_per_host), num_hosts(num_hosts),
+      num_hosts_mapped(num_hosts_mapped),
+      num_mapped_per_host(num_mapped_per_host),
+      num_mapped_size(num_mapped_size),
+      num_osds(num_osds_per_host * num_hosts) {}
+
+  void validate_osd(int osd) const {
+    EXPECT_GE(osd, 0);
+    EXPECT_LT(osd, num_osds);
+  }
+
+  bool check_osd(int osd) const {
+    return osd >= 0 && osd < num_osds;
+  }
+
+  void validate_host(int host) const {
+    assert(host >= 0);
+    assert(host < num_hosts);
+  }
+
+  std::pair<int, int> host_to_osd_range(int host) const {
+    validate_host(host);
+    auto first = host * num_osds_per_host;
+    return std::make_pair(first, first + num_osds_per_host);
+  }
+
+  int osd_to_host(int osd) const {
+    validate_osd(osd);
+    return osd / num_osds_per_host;
+  }
+};
+
+static constexpr int ROOT_TYPE = 2;
+static constexpr int HOST_TYPE = 1;
+static constexpr int OSD_TYPE = 0;
+std::pair<int, std::unique_ptr<CrushWrapper>> create_crush_heirarchy(
+  CephContext *cct,
+  const cluster_test_spec_t &spec)
+{
+  auto c = std::make_unique<CrushWrapper>();
+  c->create();
+  c->set_tunables_optimal();
+
+
+  c->set_type_name(ROOT_TYPE, "root");
+  c->set_type_name(HOST_TYPE, "host");
+  c->set_type_name(OSD_TYPE, "osd");
+
+  int rootno;
+  c->add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
+              ROOT_TYPE, 0, nullptr, nullptr, &rootno);
+  c->set_item_name(rootno, "default");
+
+  for (auto host_id = 0; host_id < spec.num_hosts; ++host_id) {
+    const std::string host_name = fmt::format("host{}", host_id);
+    const auto first_host_osd = host_id * spec.num_osds_per_host;
+    const auto next_first_host_osd = first_host_osd + spec.num_osds_per_host;
+    for (auto osd_id = first_host_osd; osd_id < next_first_host_osd; ++osd_id) {
+      const std::string osd_name = fmt::format("osd{}", osd_id);
+      auto ret = c->insert_item(
+       cct, osd_id, 1.0, osd_name,
+       {{ "root", "default"}, {"host", host_name}});
+      EXPECT_EQ(ret, 0);
+    }
+  }
+
+  c->finalize();
+  return std::make_pair(rootno, std::move(c));
+}
+
+std::vector<uint32_t> create_weight_vector(
+  const cluster_test_spec_t &spec)
+{
+  return std::vector<uint32_t>(spec.num_osds, CEPH_OSD_IN);
+}
+
+std::vector<uint32_t> create_weight_vector_first_osd_out(
+  const cluster_test_spec_t &spec,
+  const std::vector<int> &mapping)
+{
+  auto weights = create_weight_vector(spec);
+  spec.validate_osd(mapping[0]);
+  weights[mapping[0]] = CEPH_OSD_OUT;
+  return weights;
+}
+
+std::vector<uint32_t> create_weight_vector_first_host_out(
+  const cluster_test_spec_t &spec,
+  const std::vector<int> &mapping)
+{
+  auto weights = create_weight_vector(spec);
+  const auto [first, end] = spec.host_to_osd_range(spec.osd_to_host(mapping[0]));
+  for (auto i = first; i < end; ++i) {
+    weights[i] = CEPH_OSD_OUT;
+  }
+  return weights;
+}
+
+enum class mapping_change_t {
+  SAME,
+  FAILURE,
+  SAME_HOST,
+  NEW_HOST
+};
+void compare_mappings(
+  const cluster_test_spec_t &spec,
+  const std::vector<int> &before,
+  const std::vector<int> &after,
+  mapping_change_t expectation,
+  const std::pair<int, int> &range)
+{
+  const auto &[begin, end] = range;
+  for (auto i = begin; i < end; ++i) {
+    switch (expectation) {
+    case mapping_change_t::SAME:
+      EXPECT_EQ(before[i], after[i]);
+      break;
+    case mapping_change_t::FAILURE:
+      EXPECT_EQ(CRUSH_ITEM_NONE, after[i]);
+      break;
+    case mapping_change_t::SAME_HOST:
+      EXPECT_NE(before[i], after[i]);
+      if (!spec.check_osd(after[i])) {
+       spec.validate_osd(after[i]);
+      } else {
+       EXPECT_EQ(spec.osd_to_host(before[i]), spec.osd_to_host(after[i]));
+      }
+      break;
+    case mapping_change_t::NEW_HOST:
+      EXPECT_NE(before[i], after[i]);
+      if (!spec.check_osd(after[i])) {
+       spec.validate_osd(after[i]);
+      } else {
+       EXPECT_NE(spec.osd_to_host(before[i]), spec.osd_to_host(after[i]));
+      }
+      break;
+    }
+  }
+}
+
+std::vector<int> get_mapping(
+  const cluster_test_spec_t &spec,
+  CrushWrapper &c,
+  const std::vector<uint32_t> &weights,
+  int ruleno)
+{
+  std::vector<int> out;
+  c.do_rule(
+    ruleno, 0 /* seed */, out, spec.num_mapped_size,
+    weights,
+    0);
+  EXPECT_EQ(std::size(out), spec.num_mapped_size);
+  return out;
+}
+
+unsigned count_mapped(const auto &v) {
+  unsigned ret = 0;
+  for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE);
+  return ret;
+}
+
+TEST_F(CRUSHTest, msr_4_host_2_choose_rule) {
+  cluster_test_spec_t spec{3, 4, 3, 1, 3};
+  auto [rootno, c] = create_crush_heirarchy(cct, spec);
+
+  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
+  EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno));
+  EXPECT_EQ(
+    0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE));
+  EXPECT_EQ(
+    0,
+    c->set_rule_step_choose_msr(
+      ruleno, 2, 1, OSD_TYPE));
+  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3));
+
+  auto weights_all_in = create_weight_vector(spec);
+  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
+  for (auto i : before) { spec.validate_osd(i); }
+
+  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
+   * a retry of the previous step, so marking all of the osds on a host
+   * out will not cause positions mapped to that pg to remap.
+   * However, because the above is an MSR rule type, hitting an out osd
+   * will cause a retry of the previous steps as well.
+   * See https://tracker.ceph.com/issues/62214 for the original motivation */
+  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
+  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
+
+  CrushCompiler cc{*c, std::cout};
+  cc.decompile(std::cout);
+
+  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
+  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
+  fmt::print("before        : {}\n", fmt::join(before, ", "));
+  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
+
+  auto count_mapped = [](const auto &v) {
+    unsigned ret = 0;
+    for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE);
+    return ret;
+  };
+
+  EXPECT_EQ(count_mapped(before), count_mapped(after_host_out));
+
+  auto weights_osd_out = create_weight_vector_first_osd_out(spec, before);
+  auto after_osd_out = get_mapping(spec, *c, weights_osd_out, ruleno);
+  EXPECT_EQ(count_mapped(before), count_mapped(after_osd_out));
+}
+
+TEST_F(CRUSHTest, msr_2_host_2_osd) {
+  cluster_test_spec_t spec{2, 3, 2, 2, 3};
+  auto [rootno, c] = create_crush_heirarchy(cct, spec);
+
+  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
+  EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno));
+  EXPECT_EQ(
+    0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE));
+  EXPECT_EQ(
+    0,
+    c->set_rule_step_choose_msr(
+      ruleno, 2, spec.num_mapped_per_host, OSD_TYPE));
+  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3));
+
+  auto weights_all_in = create_weight_vector(spec);
+  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
+  for (auto i : before) { spec.validate_osd(i); }
+
+  fmt::print("before        : {}\n", fmt::join(before, ", "));
+  ASSERT_EQ(count_mapped(before), 3);
+
+  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
+   * a retry of the previous step, so marking all of the osds on a host
+   * out will not cause positions mapped to that pg to remap.
+   * However, because the above is an MSR rule type, hitting an out osd
+   * will cause a retry of the previous steps as well.
+   * See https://tracker.ceph.com/issues/62214 for the original motivation */
+  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
+  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
+
+  CrushCompiler cc{*c, std::cout};
+  cc.decompile(std::cout);
+
+  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
+  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
+  fmt::print("before        : {}\n", fmt::join(before, ", "));
+  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
+
+  compare_mappings(
+    spec, before, after_host_out, mapping_change_t::NEW_HOST,
+    {0, spec.num_mapped_per_host});
+  compare_mappings(
+    spec, before, after_host_out, mapping_change_t::SAME,
+    {spec.num_mapped_per_host, spec.num_mapped_size});
+}
+
+TEST_F(CRUSHTest, msr_5_host_8_6_ec_choose) {
+  cluster_test_spec_t spec{4, 5, 4, 4, 14};
+  auto [rootno, c] = create_crush_heirarchy(cct, spec);
+
+  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
+  unsigned step_id = 0;
+  EXPECT_EQ(0, c->set_rule_step_take(ruleno, step_id++, rootno));
+  EXPECT_EQ(
+    0,
+    c->set_rule_step_choose_msr(
+      ruleno, step_id++, spec.num_hosts_mapped, HOST_TYPE));
+  EXPECT_EQ(
+    0,
+    c->set_rule_step_choose_msr(
+      ruleno, step_id++, spec.num_mapped_per_host, OSD_TYPE));
+  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, step_id++));
+
+  auto weights_all_in = create_weight_vector(spec);
+  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
+  for (auto i : before) { spec.validate_osd(i); }
+
+  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
+   * a retry of the previous step, so marking all of the osds on a host
+   * out will not cause positions mapped to that pg to remap.
+   * However, because the above is an MSR rule type, hitting an out osd
+   * will cause a retry of the previous steps as well.
+   * See https://tracker.ceph.com/issues/62214 for the original motivation */
+  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
+  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
+
+  CrushCompiler cc{*c, std::cout};
+  cc.decompile(std::cout);
+
+  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
+  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
+  fmt::print("before        : {}\n", fmt::join(before, ", "));
+  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
+
+  compare_mappings(
+    spec, before, after_host_out, mapping_change_t::NEW_HOST,
+    {0, spec.num_mapped_per_host});
+  compare_mappings(
+    spec, before, after_host_out, mapping_change_t::SAME,
+    {spec.num_mapped_per_host, spec.num_mapped_size});
+}
+
+TEST_F(CRUSHTest, msr_multi_root) {
+  constexpr unsigned NUM_HOSTS = 4;
+  constexpr unsigned NUM_OSDS_PER_HOST = 3;
+
+  auto c = CrushWrapper();
+  c.create();
+  c.set_tunables_optimal();
+
+  c.set_type_name(ROOT_TYPE, "root");
+  c.set_type_name(HOST_TYPE, "host");
+  c.set_type_name(OSD_TYPE, "osd");
+
+  std::map<int, std::pair<std::string, std::string>> osd_id_to_host_root;
+  std::map<std::string, int> root_name_to_id;
+  std::map<std::string, std::vector<int>> host_name_to_osds;
+  unsigned next_osd_id = 0;
+
+  auto populate_root = [&](const auto &root_name) {
+    int rootno;
+    c.add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
+                ROOT_TYPE, 0, nullptr, nullptr, &rootno);
+    c.set_item_name(rootno, root_name);
+    root_name_to_id[root_name] = rootno;
+
+    for (unsigned host_id = 0; host_id < NUM_HOSTS; ++host_id) {
+      const std::string host_name =
+       fmt::format("{}-host{}", root_name, host_id);
+      for (unsigned osd = 0; osd < NUM_OSDS_PER_HOST; ++osd) {
+       const int osd_id = next_osd_id++;
+       const std::string osd_name = fmt::format("{}-osd{}", root_name, osd_id);
+       auto ret = c.insert_item(
+         cct, osd_id, 1.0, osd_name,
+         {{ "root", root_name }, { "host", host_name }});
+       osd_id_to_host_root[osd_id] = std::make_pair(host_name, root_name);
+       host_name_to_osds[host_name].push_back(osd_id);
+       EXPECT_EQ(ret, 0);
+      }
+    }
+  };
+
+  int ruleno = 0;
+  int ret = c.add_rule(ruleno, 8, CRUSH_RULE_TYPE_MSR_INDEP);
+  ceph_assert(ret == ruleno);
+
+  unsigned step_id = 0;
+  auto populate_rule = [&](const auto &rule_name) {
+    ret = c.set_rule_step(
+      ruleno, step_id++, CRUSH_RULE_TAKE, root_name_to_id[rule_name], 0);
+    ceph_assert(ret == 0);
+    ret = c.set_rule_step(
+      ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, HOST_TYPE);
+    ceph_assert(ret == 0);
+    ret = c.set_rule_step(
+      ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, OSD_TYPE);
+    ceph_assert(ret == 0);
+    ret = c.set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
+    ceph_assert(ret == 0);
+  };
+
+  for (const auto &root_name : { "ssd", "hdd" }) {
+    populate_root(root_name);
+    populate_rule(root_name);
+  }
+  c.set_rule_name(ruleno, "rule_name");
+  c.finalize();
+
+  constexpr unsigned ACTING_SIZE = 8;
+  constexpr unsigned OSDS_PER_ROOT = 4;
+  constexpr unsigned OSDS_PER_HOST = 2;
+  auto validate_output = [&](const auto &out) {
+    std::set<std::string> hosts;
+    for (unsigned host = 0; host < (ACTING_SIZE / OSDS_PER_HOST); ++host) {
+      std::set<std::string> hosts_this_failure_domain;
+      unsigned start = host * OSDS_PER_HOST;
+      unsigned end = (host + 1) * OSDS_PER_HOST;
+      for (unsigned i = start; i < end; ++i) {
+       EXPECT_NE(out[i], CRUSH_ITEM_NONE);
+       EXPECT_EQ(osd_id_to_host_root.count(out[i]), 1);
+       const auto &[host_name, root_name] = osd_id_to_host_root[out[start]];
+       EXPECT_EQ(i < OSDS_PER_ROOT ? "ssd" : "hdd", root_name);
+       hosts_this_failure_domain.insert(host_name);
+      }
+      for (const auto &i: hosts_this_failure_domain) {
+       EXPECT_EQ(hosts.count(i), 0);
+       hosts.insert(i);
+      }
+    }
+  };
+
+  const std::vector<uint32_t> all_in(next_osd_id, CEPH_OSD_IN);
+  for (int x = 0; x < 1000; ++x) {
+    std::vector<int> out;
+    c.do_rule(ruleno, x, out, 8, all_in, 0);
+    EXPECT_EQ(count_mapped(out), 8);
+    validate_output(out);
+
+    {
+      std::vector<uint32_t> osds_out_weight = all_in;
+      std::set<unsigned> osd_idx_out{{1, 5}};
+      for (const auto &i: osd_idx_out) {
+       osds_out_weight[out[i]] = CEPH_OSD_OUT;
+      }
+      std::vector<int> osds_out;
+      c.do_rule(ruleno, x, osds_out, 8, osds_out_weight, 0);
+      EXPECT_EQ(count_mapped(osds_out), 8);
+      validate_output(osds_out);
+      for (unsigned i = 0; i < osds_out.size(); ++i) {
+       if (osd_idx_out.count(i)) {
+         EXPECT_NE(osds_out[i], out[i]);
+       } else {
+         EXPECT_EQ(osds_out[i], out[i]);
+       }
+      }
+    }
+
+    {
+      std::vector<uint32_t> hosts_out_weight = all_in;
+      std::set<unsigned> osd_ids_out;
+
+      for (const auto &i : {2, 6}) {
+       const auto &[host_name, _] = osd_id_to_host_root[out[i]];
+       for (const auto &osd_id: host_name_to_osds[host_name]) {
+         osd_ids_out.insert(osd_id);
+         hosts_out_weight[osd_id] = CEPH_OSD_OUT;
+       }
+      }
+
+      std::vector<int> hosts_out;
+      c.do_rule(ruleno, x, hosts_out, 8, hosts_out_weight, 0);
+      EXPECT_EQ(count_mapped(hosts_out), 8);
+      validate_output(hosts_out);
+      for (unsigned i = 0; i < hosts_out.size(); ++i) {
+       if (osd_ids_out.count(out[i])) {
+         EXPECT_NE(hosts_out[i], out[i]);
+       } else {
+         EXPECT_EQ(hosts_out[i], out[i]);
+       }
+      }
+    }
+  }
+}