From 9dff01ddb66f64cf2153c5875e80ea54618740de Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Wed, 25 Sep 2019 19:36:10 +0800 Subject: [PATCH] osd/OSDMap: do not trust partially simplified pg_upmap_item If we simplified a partially no-op pg_upmap_item, we shall still continue to verify that the remaining part is valid. The bug is introduced by 02e5499b350bcd7d9eac98b2072052a9a4a1f535, before which we always validate the correctness of a pg_upmap_item before trying to cancel or simplify it. Fixes: https://tracker.ceph.com/issues/42052 Signed-off-by: xie xingguo (cherry picked from commit 4196b13283144de966eeba40e6765f10b254dac6) --- src/osd/OSDMap.cc | 80 ++++++++++++++++++----------------- src/test/osd/TestOSDMap.cc | 86 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 39 deletions(-) diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index e32a5bf2caa61..a65d0c500a6fb 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1809,45 +1809,6 @@ bool OSDMap::check_pg_upmaps( } vector raw, up; pg_to_raw_upmap(pg, &raw, &up); - auto i = pg_upmap.find(pg); - if (i != pg_upmap.end() && raw == i->second) { - ldout(cct, 10) << " removing redundant pg_upmap " - << i->first << " " << i->second - << dendl; - to_cancel->push_back(pg); - continue; - } - auto j = pg_upmap_items.find(pg); - if (j != pg_upmap_items.end()) { - mempool::osdmap::vector> newmap; - for (auto& p : j->second) { - if (std::find(raw.begin(), raw.end(), p.first) == raw.end()) { - // cancel mapping if source osd does not exist anymore - continue; - } - if (p.second != CRUSH_ITEM_NONE && p.second < max_osd && - p.second >= 0 && osd_weight[p.second] == 0) { - // cancel mapping if target osd is out - continue; - } - newmap.push_back(p); - } - if (newmap.empty()) { - ldout(cct, 10) << " removing no-op pg_upmap_items " - << j->first << " " << j->second - << dendl; - to_cancel->push_back(pg); - continue; - } else if (newmap != j->second) { - ldout(cct, 10) << " simplifying partially no-op pg_upmap_items " - << j->first << " " << j->second - << " -> " << newmap - << dendl; - to_remap->insert({pg, newmap}); - any_change = true; - continue; - } - } auto crush_rule = get_pg_pool_crush_rule(pg); auto r = crush->verify_upmap(cct, crush_rule, @@ -1892,6 +1853,47 @@ bool OSDMap::check_pg_upmaps( break; } } + if (!to_cancel->empty() && to_cancel->back() == pg) + continue; + // okay, upmap is valid + // continue to check if it is still necessary + auto i = pg_upmap.find(pg); + if (i != pg_upmap.end() && raw == i->second) { + ldout(cct, 10) << " removing redundant pg_upmap " + << i->first << " " << i->second + << dendl; + to_cancel->push_back(pg); + continue; + } + auto j = pg_upmap_items.find(pg); + if (j != pg_upmap_items.end()) { + mempool::osdmap::vector> newmap; + for (auto& p : j->second) { + if (std::find(raw.begin(), raw.end(), p.first) == raw.end()) { + // cancel mapping if source osd does not exist anymore + continue; + } + if (p.second != CRUSH_ITEM_NONE && p.second < max_osd && + p.second >= 0 && osd_weight[p.second] == 0) { + // cancel mapping if target osd is out + continue; + } + newmap.push_back(p); + } + if (newmap.empty()) { + ldout(cct, 10) << " removing no-op pg_upmap_items " + << j->first << " " << j->second + << dendl; + to_cancel->push_back(pg); + } else if (newmap != j->second) { + ldout(cct, 10) << " simplifying partially no-op pg_upmap_items " + << j->first << " " << j->second + << " -> " << newmap + << dendl; + to_remap->insert({pg, newmap}); + any_change = true; + } + } } any_change = any_change || !to_cancel->empty(); return any_change; diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc index 2bd873fd01a61..0ecae54fbc3cb 100644 --- a/src/test/osd/TestOSDMap.cc +++ b/src/test/osd/TestOSDMap.cc @@ -1397,6 +1397,92 @@ TEST_F(OSDMapTest, BUG_40104) { } } +TEST_F(OSDMapTest, BUG_42052) { + // https://tracker.ceph.com/issues/42052 + set_up_map(6, true); + const string pool_name("pool"); + // build customized crush rule for "pool" + CrushWrapper crush; + get_crush(osdmap, crush); + string rule_name = "rule"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + int min_size = 3; + int max_size = 3; + int steps = 8; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0, osd.1, osd.2 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(1); + p->set_pgp_num(1); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_name; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name); + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + { + // pg_upmap 1.0 [2,3,5] + vector new_up{2,3,5}; + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector( + new_up.begin(), new_up.end()); + osdmap.apply_incremental(pending_inc); + } + { + // pg_upmap_items 1.0 [0,3,4,5] + vector> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(0, 3)); + new_pg_upmap_items.push_back(make_pair(4, 5)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + } + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + ASSERT_FALSE(osdmap.have_pg_upmaps(pgid)); + } +} + TEST(PGTempMap, basic) { PGTempMap m; -- 2.39.5