From: David Zafman Date: Thu, 5 Dec 2019 00:30:00 +0000 (-0800) Subject: Revert "crush: remove invalid upmap items" X-Git-Tag: v12.2.13~20^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6461a411b766622060e5df0433dc3ce79eb1889f;p=ceph.git Revert "crush: remove invalid upmap items" This reverts commit 09528ac4663cda9f206eb8d4dcc15926b8e4586f. Signed-off-by: David Zafman --- diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index db6628b4b6d38..55d627b047496 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -891,27 +891,16 @@ int CrushWrapper::verify_upmap(CephContext *cct, << dendl; return -ENOENT; } - int root_bucket = 0; - int cursor = 0; for (unsigned step = 0; step < rule->len; ++step) { auto curstep = &rule->steps[step]; ldout(cct, 10) << __func__ << " step " << step << dendl; switch (curstep->op) { - case CRUSH_RULE_TAKE: - { - root_bucket = curstep->arg1; - } - break; case CRUSH_RULE_CHOOSELEAF_FIRSTN: case CRUSH_RULE_CHOOSELEAF_INDEP: { - int numrep = curstep->arg1; int type = curstep->arg2; - int real_rep = 0; if (type == 0) // osd break; - if (numrep <= 0) - numrep += pool_size; map> osds_by_parent; // parent_of_desired_type -> osds for (auto osd : up) { auto parent = get_parent_of_type(osd, type, rule_id); @@ -923,8 +912,6 @@ int CrushWrapper::verify_upmap(CephContext *cct, << dendl; } } - ldout(cct, 10) << __func__ << " osds_by_parent " << osds_by_parent << dendl; - ceph_assert(root_bucket < 0); for (auto i : osds_by_parent) { if (i.second.size() > 1) { lderr(cct) << __func__ << " multiple osds " << i.second @@ -932,22 +919,6 @@ int CrushWrapper::verify_upmap(CephContext *cct, << dendl; return -EINVAL; } - if (subtree_contains(root_bucket, i.first)) { - real_rep++; - } - } - if (real_rep != numrep) { - lderr(cct) << __func__ << " expected " << numrep << " items in bucket " << root_bucket - << " real " << real_rep << dendl; - return -EINVAL; - } - // validate the osd's in subtree - for (int c = 0; cursor < up.size() && c < numrep; ++cursor, ++c) { - int osd = up[cursor]; - if (!subtree_contains(root_bucket, osd)) { - lderr(cct) << __func__ << " osd " << osd << " not in bucket " << root_bucket << dendl; - return -EINVAL; - } } } break; @@ -957,7 +928,6 @@ int CrushWrapper::verify_upmap(CephContext *cct, { int numrep = curstep->arg1; int type = curstep->arg2; - int real_rep = 0; if (type == 0) // osd break; if (numrep <= 0) @@ -979,17 +949,6 @@ int CrushWrapper::verify_upmap(CephContext *cct, << dendl; return -EINVAL; } - ceph_assert(root_bucket < 0); - for (auto & i : parents_of_type) { - if (subtree_contains(root_bucket, i)) { - real_rep++; - } - } - if (real_rep != numrep) { - lderr(cct) << __func__ << " expected " << numrep << " items in bucket " << root_bucket - << " real " << real_rep << dendl; - return -EINVAL; - } } break; diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc index af7e2336bfb77..42c1bc590d579 100644 --- a/src/test/osd/TestOSDMap.cc +++ b/src/test/osd/TestOSDMap.cc @@ -1447,223 +1447,6 @@ TEST_F(OSDMapTest, BUG_42052) { } } -TEST_F(OSDMapTest, CleanPGUpmapsFixLeakedUpmaps) { - set_up_map(60); - { - // build a temporary crush topology of 2datacenters, 3racks per dc, - // 1host per rack, 10osds per host - OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. - tmp.deepish_copy_from(osdmap); - const int expected_host_num = 6; - int osd_per_host = (int)get_num_osds() / expected_host_num; - ASSERT_GE(osd_per_host, 10); - int host_per_dc = 3; - int index = 0; - int dc_index = 0; - for (int i = 0; i < (int)get_num_osds(); i++) { - if (i && i % osd_per_host == 0) { - ++index; - } - if (i && i % (host_per_dc * osd_per_host) == 0) { - ++dc_index; - } - stringstream osd_name; - stringstream host_name; - stringstream rack_name; - stringstream dc_name; - vector move_to; - osd_name << "osd." << i; - host_name << "host-" << index; - rack_name << "rack-" << index; - dc_name << "dc-" << dc_index; - move_to.push_back("root=default"); - string dc_loc = "datacenter=" + dc_name.str(); - move_to.push_back(dc_loc); - string rack_loc = "rack=" + rack_name.str(); - move_to.push_back(rack_loc); - string host_loc = "host=" + host_name.str(); - move_to.push_back(host_loc); - auto r = crush_move(tmp, osd_name.str(), move_to); - ASSERT_EQ(0, r); - } - - // build crush rule - CrushWrapper crush; - get_crush(tmp, crush); - string rule_name = "rule_xeus_993_1"; - int rule_type = pg_pool_t::TYPE_REPLICATED; - ASSERT_TRUE(!crush.rule_exists(rule_name)); - int rno; - for (rno = 0; rno < crush.get_max_rules(); rno++) { - if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) - break; - } - string root_name = "default"; - string dc_1 = "dc-0"; - int dc1 = crush.get_item_id(dc_1); - string dc_2 = "dc-1"; - int dc2 = crush.get_item_id(dc_2); - int min_size = 1; - int max_size = 20; - int steps = 8; - crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); - int step = 0; - crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc1, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); - crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc2, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); - crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); - ASSERT_TRUE(step == steps); - auto r = crush_add_rule(crush.get_crush_map(), rule, rno); - ASSERT_TRUE(r >= 0); - crush.set_rule_name(rno, rule_name); - { - OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); - pending_inc.crush.clear(); - crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); - tmp.apply_incremental(pending_inc); - } - int64_t pool_xeus_993; - { - OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); - new_pool_inc.new_pool_max = tmp.get_pool_max(); - new_pool_inc.fsid = tmp.get_fsid(); - pg_pool_t empty; - pool_xeus_993 = ++new_pool_inc.new_pool_max; - pg_pool_t *p = new_pool_inc.get_new_pool(pool_xeus_993, &empty); - p->size = 4; - p->set_pg_num(4096); - p->set_pgp_num(4096); - p->type = pg_pool_t::TYPE_REPLICATED; - p->crush_rule = rno; - p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); - new_pool_inc.new_pool_names[pool_xeus_993] = "pool_xeus_993"; - tmp.apply_incremental(new_pool_inc); - } - - pg_t rep_pg(0, pool_xeus_993); - pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); - { - int from = -1; - int to = -1; - vector rep_up; - int rep_up_primary; - tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); - std::cout << "pgid " << rep_up << " up " << rep_up << std::endl; - ASSERT_TRUE(rep_up.size() == 4); - from = *(rep_up.begin()); - ASSERT_TRUE(from >= 0); - auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); - if (dc_parent == dc1) - dc_parent = dc2; - else - dc_parent = dc1; - auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); - ASSERT_TRUE(dc_parent < 0); - ASSERT_TRUE(rack_parent < 0); - set rack_parents; - for (auto &i: rep_up) { - if (i == from) continue; - auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); - rack_parents.insert(rack_parent); - } - for (int i = 0; i < (int)get_num_osds(); i++) { - if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { - auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); - auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); - if (dc_p == dc_parent && - rack_parents.find(rack_p) == rack_parents.end()) { - to = i; - break; - } - } - } - ASSERT_TRUE(to >= 0); - ASSERT_TRUE(from != to); - std::cout << "from " << from << " to " << to << std::endl; - vector> new_pg_upmap_items; - new_pg_upmap_items.push_back(make_pair(from, to)); - OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); - pending_inc.new_pg_upmap_items[rep_pgid] = - mempool::osdmap::vector>( - new_pg_upmap_items.begin(), new_pg_upmap_items.end()); - tmp.apply_incremental(pending_inc); - ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); - } - pg_t rep_pg2(2, pool_xeus_993); - pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); - { - pg_t rep_pgid = rep_pgid2; - vector from_osds{-1, -1}; - vector rep_up; - int rep_up_primary; - tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); - ASSERT_TRUE(rep_up.size() == 4); - from_osds[0] = *(rep_up.begin()); - from_osds[1] = *(rep_up.rbegin()); - std::cout << "pgid " << rep_pgid2 << " up " << rep_up << std::endl; - ASSERT_TRUE(*(from_osds.begin()) >= 0); - ASSERT_TRUE(*(from_osds.rbegin()) >= 0); - vector> new_pg_upmap_items; - for (auto &from: from_osds) { - int to = -1; - auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); - if (dc_parent == dc1) - dc_parent = dc2; - else - dc_parent = dc1; - auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); - ASSERT_TRUE(dc_parent < 0); - ASSERT_TRUE(rack_parent < 0); - set rack_parents; - for (auto &i: rep_up) { - if (i == from) continue; - auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); - rack_parents.insert(rack_parent); - } - for (auto &i: new_pg_upmap_items) { - auto rack_from = tmp.crush->get_parent_of_type(i.first, 3, rno); - auto rack_to = tmp.crush->get_parent_of_type(i.second, 3, rno); - rack_parents.insert(rack_from); - rack_parents.insert(rack_to); - } - for (int i = 0; i < (int)get_num_osds(); i++) { - if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { - auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); - auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); - if (dc_p == dc_parent && - rack_parents.find(rack_p) == rack_parents.end()) { - to = i; - break; - } - } - } - ASSERT_TRUE(to >= 0); - ASSERT_TRUE(from != to); - std::cout << "from " << from << " to " << to << std::endl; - new_pg_upmap_items.push_back(make_pair(from, to)); - } - OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); - pending_inc.new_pg_upmap_items[rep_pgid] = - mempool::osdmap::vector>( - new_pg_upmap_items.begin(), new_pg_upmap_items.end()); - tmp.apply_incremental(pending_inc); - ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); - } - { - // *maybe_remove_pg_upmaps* should remove the above upmap_item - OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); - clean_pg_upmaps(g_ceph_context, tmp, pending_inc); - tmp.apply_incremental(pending_inc); - ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid)); - ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid2)); - } - } -} - TEST(PGTempMap, basic) { PGTempMap m;