From: Samuel Just Date: Fri, 17 Nov 2023 04:26:07 +0000 (-0800) Subject: test/crush/crush.cc: add tests specifically for MSR X-Git-Tag: v19.1.0~339^2~8 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=4b4eb17d328612284af22870908ca93de869a911;p=ceph.git test/crush/crush.cc: add tests specifically for MSR Signed-off-by: Samuel Just --- diff --git a/src/test/crush/crush.cc b/src/test/crush/crush.cc index 03bbca97e32ff..2be7d5540d8dc 100644 --- a/src/test/crush/crush.cc +++ b/src/test/crush/crush.cc @@ -1103,3 +1103,459 @@ TEST_F(CRUSHTest, straw2_reweight) { cout << " vs " << estddev << std::endl; } } + +struct cluster_test_spec_t { + const int num_osds_per_host; + const int num_hosts; + + const int num_hosts_mapped; + const int num_mapped_per_host; + const int num_mapped_size; + + const int num_osds; + + cluster_test_spec_t( + int num_osds_per_host, int num_hosts, + int num_hosts_mapped, int num_mapped_per_host, int num_mapped_size) + : num_osds_per_host(num_osds_per_host), num_hosts(num_hosts), + num_hosts_mapped(num_hosts_mapped), + num_mapped_per_host(num_mapped_per_host), + num_mapped_size(num_mapped_size), + num_osds(num_osds_per_host * num_hosts) {} + + void validate_osd(int osd) const { + EXPECT_GE(osd, 0); + EXPECT_LT(osd, num_osds); + } + + bool check_osd(int osd) const { + return osd >= 0 && osd < num_osds; + } + + void validate_host(int host) const { + assert(host >= 0); + assert(host < num_hosts); + } + + std::pair host_to_osd_range(int host) const { + validate_host(host); + auto first = host * num_osds_per_host; + return std::make_pair(first, first + num_osds_per_host); + } + + int osd_to_host(int osd) const { + validate_osd(osd); + return osd / num_osds_per_host; + } +}; + +static constexpr int ROOT_TYPE = 2; +static constexpr int HOST_TYPE = 1; +static constexpr int OSD_TYPE = 0; +std::pair> create_crush_heirarchy( + CephContext *cct, + const cluster_test_spec_t &spec) +{ + auto c = std::make_unique(); + c->create(); + c->set_tunables_optimal(); + + + c->set_type_name(ROOT_TYPE, "root"); + c->set_type_name(HOST_TYPE, "host"); + c->set_type_name(OSD_TYPE, "osd"); + + int rootno; + c->add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, 0, nullptr, nullptr, &rootno); + c->set_item_name(rootno, "default"); + + for (auto host_id = 0; host_id < spec.num_hosts; ++host_id) { + const std::string host_name = fmt::format("host{}", host_id); + const auto first_host_osd = host_id * spec.num_osds_per_host; + const auto next_first_host_osd = first_host_osd + spec.num_osds_per_host; + for (auto osd_id = first_host_osd; osd_id < next_first_host_osd; ++osd_id) { + const std::string osd_name = fmt::format("osd{}", osd_id); + auto ret = c->insert_item( + cct, osd_id, 1.0, osd_name, + {{ "root", "default"}, {"host", host_name}}); + EXPECT_EQ(ret, 0); + } + } + + c->finalize(); + return std::make_pair(rootno, std::move(c)); +} + +std::vector create_weight_vector( + const cluster_test_spec_t &spec) +{ + return std::vector(spec.num_osds, CEPH_OSD_IN); +} + +std::vector create_weight_vector_first_osd_out( + const cluster_test_spec_t &spec, + const std::vector &mapping) +{ + auto weights = create_weight_vector(spec); + spec.validate_osd(mapping[0]); + weights[mapping[0]] = CEPH_OSD_OUT; + return weights; +} + +std::vector create_weight_vector_first_host_out( + const cluster_test_spec_t &spec, + const std::vector &mapping) +{ + auto weights = create_weight_vector(spec); + const auto [first, end] = spec.host_to_osd_range(spec.osd_to_host(mapping[0])); + for (auto i = first; i < end; ++i) { + weights[i] = CEPH_OSD_OUT; + } + return weights; +} + +enum class mapping_change_t { + SAME, + FAILURE, + SAME_HOST, + NEW_HOST +}; +void compare_mappings( + const cluster_test_spec_t &spec, + const std::vector &before, + const std::vector &after, + mapping_change_t expectation, + const std::pair &range) +{ + const auto &[begin, end] = range; + for (auto i = begin; i < end; ++i) { + switch (expectation) { + case mapping_change_t::SAME: + EXPECT_EQ(before[i], after[i]); + break; + case mapping_change_t::FAILURE: + EXPECT_EQ(CRUSH_ITEM_NONE, after[i]); + break; + case mapping_change_t::SAME_HOST: + EXPECT_NE(before[i], after[i]); + if (!spec.check_osd(after[i])) { + spec.validate_osd(after[i]); + } else { + EXPECT_EQ(spec.osd_to_host(before[i]), spec.osd_to_host(after[i])); + } + break; + case mapping_change_t::NEW_HOST: + EXPECT_NE(before[i], after[i]); + if (!spec.check_osd(after[i])) { + spec.validate_osd(after[i]); + } else { + EXPECT_NE(spec.osd_to_host(before[i]), spec.osd_to_host(after[i])); + } + break; + } + } +} + +std::vector get_mapping( + const cluster_test_spec_t &spec, + CrushWrapper &c, + const std::vector &weights, + int ruleno) +{ + std::vector out; + c.do_rule( + ruleno, 0 /* seed */, out, spec.num_mapped_size, + weights, + 0); + EXPECT_EQ(std::size(out), spec.num_mapped_size); + return out; +} + +unsigned count_mapped(const auto &v) { + unsigned ret = 0; + for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE); + return ret; +} + +TEST_F(CRUSHTest, msr_4_host_2_choose_rule) { + cluster_test_spec_t spec{3, 4, 3, 1, 3}; + auto [rootno, c] = create_crush_heirarchy(cct, spec); + + auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP); + EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno)); + EXPECT_EQ( + 0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE)); + EXPECT_EQ( + 0, + c->set_rule_step_choose_msr( + ruleno, 2, 1, OSD_TYPE)); + EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3)); + + auto weights_all_in = create_weight_vector(spec); + auto before = get_mapping(spec, *c, weights_all_in, ruleno); + for (auto i : before) { spec.validate_osd(i); } + + /* MSR test case. With normal CRUSH, hitting an out osd won't cause + * a retry of the previous step, so marking all of the osds on a host + * out will not cause positions mapped to that pg to remap. + * However, because the above is an MSR rule type, hitting an out osd + * will cause a retry of the previous steps as well. + * See https://tracker.ceph.com/issues/62214 for the original motivation */ + auto weights_host_out = create_weight_vector_first_host_out(spec, before); + auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno); + + CrushCompiler cc{*c, std::cout}; + cc.decompile(std::cout); + + fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", ")); + fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", ")); + fmt::print("before : {}\n", fmt::join(before, ", ")); + fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", ")); + + auto count_mapped = [](const auto &v) { + unsigned ret = 0; + for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE); + return ret; + }; + + EXPECT_EQ(count_mapped(before), count_mapped(after_host_out)); + + auto weights_osd_out = create_weight_vector_first_osd_out(spec, before); + auto after_osd_out = get_mapping(spec, *c, weights_osd_out, ruleno); + EXPECT_EQ(count_mapped(before), count_mapped(after_osd_out)); +} + +TEST_F(CRUSHTest, msr_2_host_2_osd) { + cluster_test_spec_t spec{2, 3, 2, 2, 3}; + auto [rootno, c] = create_crush_heirarchy(cct, spec); + + auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP); + EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno)); + EXPECT_EQ( + 0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE)); + EXPECT_EQ( + 0, + c->set_rule_step_choose_msr( + ruleno, 2, spec.num_mapped_per_host, OSD_TYPE)); + EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3)); + + auto weights_all_in = create_weight_vector(spec); + auto before = get_mapping(spec, *c, weights_all_in, ruleno); + for (auto i : before) { spec.validate_osd(i); } + + fmt::print("before : {}\n", fmt::join(before, ", ")); + ASSERT_EQ(count_mapped(before), 3); + + /* MSR test case. With normal CRUSH, hitting an out osd won't cause + * a retry of the previous step, so marking all of the osds on a host + * out will not cause positions mapped to that pg to remap. + * However, because the above is an MSR rule type, hitting an out osd + * will cause a retry of the previous steps as well. + * See https://tracker.ceph.com/issues/62214 for the original motivation */ + auto weights_host_out = create_weight_vector_first_host_out(spec, before); + auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno); + + CrushCompiler cc{*c, std::cout}; + cc.decompile(std::cout); + + fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", ")); + fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", ")); + fmt::print("before : {}\n", fmt::join(before, ", ")); + fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", ")); + + compare_mappings( + spec, before, after_host_out, mapping_change_t::NEW_HOST, + {0, spec.num_mapped_per_host}); + compare_mappings( + spec, before, after_host_out, mapping_change_t::SAME, + {spec.num_mapped_per_host, spec.num_mapped_size}); +} + +TEST_F(CRUSHTest, msr_5_host_8_6_ec_choose) { + cluster_test_spec_t spec{4, 5, 4, 4, 14}; + auto [rootno, c] = create_crush_heirarchy(cct, spec); + + auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP); + unsigned step_id = 0; + EXPECT_EQ(0, c->set_rule_step_take(ruleno, step_id++, rootno)); + EXPECT_EQ( + 0, + c->set_rule_step_choose_msr( + ruleno, step_id++, spec.num_hosts_mapped, HOST_TYPE)); + EXPECT_EQ( + 0, + c->set_rule_step_choose_msr( + ruleno, step_id++, spec.num_mapped_per_host, OSD_TYPE)); + EXPECT_EQ(0, c->set_rule_step_emit(ruleno, step_id++)); + + auto weights_all_in = create_weight_vector(spec); + auto before = get_mapping(spec, *c, weights_all_in, ruleno); + for (auto i : before) { spec.validate_osd(i); } + + /* MSR test case. With normal CRUSH, hitting an out osd won't cause + * a retry of the previous step, so marking all of the osds on a host + * out will not cause positions mapped to that pg to remap. + * However, because the above is an MSR rule type, hitting an out osd + * will cause a retry of the previous steps as well. + * See https://tracker.ceph.com/issues/62214 for the original motivation */ + auto weights_host_out = create_weight_vector_first_host_out(spec, before); + auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno); + + CrushCompiler cc{*c, std::cout}; + cc.decompile(std::cout); + + fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", ")); + fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", ")); + fmt::print("before : {}\n", fmt::join(before, ", ")); + fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", ")); + + compare_mappings( + spec, before, after_host_out, mapping_change_t::NEW_HOST, + {0, spec.num_mapped_per_host}); + compare_mappings( + spec, before, after_host_out, mapping_change_t::SAME, + {spec.num_mapped_per_host, spec.num_mapped_size}); +} + +TEST_F(CRUSHTest, msr_multi_root) { + constexpr unsigned NUM_HOSTS = 4; + constexpr unsigned NUM_OSDS_PER_HOST = 3; + + auto c = CrushWrapper(); + c.create(); + c.set_tunables_optimal(); + + c.set_type_name(ROOT_TYPE, "root"); + c.set_type_name(HOST_TYPE, "host"); + c.set_type_name(OSD_TYPE, "osd"); + + std::map> osd_id_to_host_root; + std::map root_name_to_id; + std::map> host_name_to_osds; + unsigned next_osd_id = 0; + + auto populate_root = [&](const auto &root_name) { + int rootno; + c.add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, 0, nullptr, nullptr, &rootno); + c.set_item_name(rootno, root_name); + root_name_to_id[root_name] = rootno; + + for (unsigned host_id = 0; host_id < NUM_HOSTS; ++host_id) { + const std::string host_name = + fmt::format("{}-host{}", root_name, host_id); + for (unsigned osd = 0; osd < NUM_OSDS_PER_HOST; ++osd) { + const int osd_id = next_osd_id++; + const std::string osd_name = fmt::format("{}-osd{}", root_name, osd_id); + auto ret = c.insert_item( + cct, osd_id, 1.0, osd_name, + {{ "root", root_name }, { "host", host_name }}); + osd_id_to_host_root[osd_id] = std::make_pair(host_name, root_name); + host_name_to_osds[host_name].push_back(osd_id); + EXPECT_EQ(ret, 0); + } + } + }; + + int ruleno = 0; + int ret = c.add_rule(ruleno, 8, CRUSH_RULE_TYPE_MSR_INDEP); + ceph_assert(ret == ruleno); + + unsigned step_id = 0; + auto populate_rule = [&](const auto &rule_name) { + ret = c.set_rule_step( + ruleno, step_id++, CRUSH_RULE_TAKE, root_name_to_id[rule_name], 0); + ceph_assert(ret == 0); + ret = c.set_rule_step( + ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, HOST_TYPE); + ceph_assert(ret == 0); + ret = c.set_rule_step( + ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, OSD_TYPE); + ceph_assert(ret == 0); + ret = c.set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0); + ceph_assert(ret == 0); + }; + + for (const auto &root_name : { "ssd", "hdd" }) { + populate_root(root_name); + populate_rule(root_name); + } + c.set_rule_name(ruleno, "rule_name"); + c.finalize(); + + constexpr unsigned ACTING_SIZE = 8; + constexpr unsigned OSDS_PER_ROOT = 4; + constexpr unsigned OSDS_PER_HOST = 2; + auto validate_output = [&](const auto &out) { + std::set hosts; + for (unsigned host = 0; host < (ACTING_SIZE / OSDS_PER_HOST); ++host) { + std::set hosts_this_failure_domain; + unsigned start = host * OSDS_PER_HOST; + unsigned end = (host + 1) * OSDS_PER_HOST; + for (unsigned i = start; i < end; ++i) { + EXPECT_NE(out[i], CRUSH_ITEM_NONE); + EXPECT_EQ(osd_id_to_host_root.count(out[i]), 1); + const auto &[host_name, root_name] = osd_id_to_host_root[out[start]]; + EXPECT_EQ(i < OSDS_PER_ROOT ? "ssd" : "hdd", root_name); + hosts_this_failure_domain.insert(host_name); + } + for (const auto &i: hosts_this_failure_domain) { + EXPECT_EQ(hosts.count(i), 0); + hosts.insert(i); + } + } + }; + + const std::vector all_in(next_osd_id, CEPH_OSD_IN); + for (int x = 0; x < 1000; ++x) { + std::vector out; + c.do_rule(ruleno, x, out, 8, all_in, 0); + EXPECT_EQ(count_mapped(out), 8); + validate_output(out); + + { + std::vector osds_out_weight = all_in; + std::set osd_idx_out{{1, 5}}; + for (const auto &i: osd_idx_out) { + osds_out_weight[out[i]] = CEPH_OSD_OUT; + } + std::vector osds_out; + c.do_rule(ruleno, x, osds_out, 8, osds_out_weight, 0); + EXPECT_EQ(count_mapped(osds_out), 8); + validate_output(osds_out); + for (unsigned i = 0; i < osds_out.size(); ++i) { + if (osd_idx_out.count(i)) { + EXPECT_NE(osds_out[i], out[i]); + } else { + EXPECT_EQ(osds_out[i], out[i]); + } + } + } + + { + std::vector hosts_out_weight = all_in; + std::set osd_ids_out; + + for (const auto &i : {2, 6}) { + const auto &[host_name, _] = osd_id_to_host_root[out[i]]; + for (const auto &osd_id: host_name_to_osds[host_name]) { + osd_ids_out.insert(osd_id); + hosts_out_weight[osd_id] = CEPH_OSD_OUT; + } + } + + std::vector hosts_out; + c.do_rule(ruleno, x, hosts_out, 8, hosts_out_weight, 0); + EXPECT_EQ(count_mapped(hosts_out), 8); + validate_output(hosts_out); + for (unsigned i = 0; i < hosts_out.size(); ++i) { + if (osd_ids_out.count(out[i])) { + EXPECT_NE(hosts_out[i], out[i]); + } else { + EXPECT_EQ(hosts_out[i], out[i]); + } + } + } + } +}