From: Sage Weil Date: Wed, 15 Mar 2017 16:46:25 +0000 (-0400) Subject: crush: implement try_remap_rule X-Git-Tag: v12.0.2~280^2~6 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=06ec9d41ebc6db434ccf832713863e14ae5089b2;p=ceph-ci.git crush: implement try_remap_rule Simulate a CRUSH mapping but try to identify alternative OSD choices (based on an underfull list and overfull set) that still respect the CRUSH rule constraints. Signed-off-by: Sage Weil --- diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 940296a8b6d..8dd606f0d9d 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -1993,3 +1993,207 @@ bool CrushWrapper::is_valid_crush_loc(CephContext *cct, } return true; } + +int CrushWrapper::_choose_type_stack( + CephContext *cct, + const vector>& stack, + const set& overfull, + const vector& underfull, + const vector& orig, + vector::const_iterator& i, + set& used, + vector *pw) const +{ + vector w = *pw; + vector o; + + ldout(cct, 10) << __func__ << " stack " << stack + << " orig " << orig + << " at " << *i + << " pw " << *pw + << dendl; + + vector cumulative_fanout(stack.size()); + int f = 1; + for (int j = (int)stack.size() - 1; j >= 0; --j) { + cumulative_fanout[j] = f; + f *= stack[j].second; + } + ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout + << dendl; + + for (unsigned j = 0; j < stack.size(); ++j) { + int type = stack[j].first; + int fanout = stack[j].second; + int cum_fanout = cumulative_fanout[j]; + ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout + << " cumulative " << cum_fanout + << " w " << w << dendl; + vector o; + auto tmpi = i; + for (auto from : w) { + ldout(cct, 10) << " from " << from << dendl; + + for (int pos = 0; pos < fanout; ++pos) { + if (type > 0) { + // non-leaf + int item = *tmpi; + do { + int r = get_immediate_parent_id(item, &item); + if (r < 0) { + ldout(cct, 10) << __func__ << " parent of " << item << " got " + << cpp_strerror(r) << dendl; + return -EINVAL; + } + } while (get_bucket_type(item) != type); + o.push_back(item); + ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item + << " of type " << type << dendl; + int n = cum_fanout; + while (n-- && tmpi != orig.end()) + ++tmpi; + } else { + // leaf + bool replaced = false; + if (overfull.count(*i)) { + for (auto item : underfull) { + ldout(cct, 10) << __func__ << " pos " << pos + << " was " << *i << " considering " << item + << dendl; + if (used.count(item)) { + ldout(cct, 20) << __func__ << " in used " << used << dendl; + continue; + } + if (!subtree_contains(from, item)) { + ldout(cct, 20) << __func__ << " not in subtree " << from << dendl; + continue; + } + if (std::find(orig.begin(), orig.end(), item) != orig.end()) { + ldout(cct, 20) << __func__ << " in orig " << orig << dendl; + continue; + } + o.push_back(item); + used.insert(item); + ldout(cct, 10) << __func__ << " pos " << pos << " replace " + << *i << " -> " << item << dendl; + replaced = true; + ++i; + break; + } + } + if (!replaced) { + ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i + << dendl; + o.push_back(*i); + ++i; + } + if (i == orig.end()) { + ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl; + break; + } + } + } + if (i == orig.end()) { + ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl; + break; + } + } + ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl; + w.swap(o); + } + *pw = w; + return 0; +} + +int CrushWrapper::try_remap_rule( + CephContext *cct, + int ruleno, + int maxout, + const set& overfull, + const vector& underfull, + const vector& orig, + vector *out) const +{ + const crush_map *map = crush; + const crush_rule *rule = get_rule(ruleno); + assert(rule); + + ldout(cct, 10) << __func__ << " ruleno " << ruleno + << " numrep " << maxout << " overfull " << overfull + << " underfull " << underfull << " orig " << orig + << dendl; + vector w; // working set + out->clear(); + + auto i = orig.begin(); + set used; + + vector> type_stack; // (type, fan-out) + + for (unsigned step = 0; step < rule->len; ++step) { + const crush_rule_step *curstep = &rule->steps[step]; + ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; + switch (curstep->op) { + case CRUSH_RULE_TAKE: + if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) || + (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets && + map->buckets[-1-curstep->arg1])) { + w.clear(); + w.push_back(curstep->arg1); + ldout(cct, 10) << __func__ << " take " << w << dendl; + } else { + ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; + } + break; + + case CRUSH_RULE_CHOOSELEAF_FIRSTN: + case CRUSH_RULE_CHOOSELEAF_INDEP: + { + int numrep = curstep->arg1; + int type = curstep->arg2; + if (numrep <= 0) + numrep += maxout; + type_stack.push_back(make_pair(type, numrep)); + type_stack.push_back(make_pair(0, 1)); + int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, + i, used, &w); + if (r < 0) + return r; + type_stack.clear(); + } + break; + + case CRUSH_RULE_CHOOSE_FIRSTN: + case CRUSH_RULE_CHOOSE_INDEP: + { + int numrep = curstep->arg1; + int type = curstep->arg2; + if (numrep <= 0) + numrep += maxout; + type_stack.push_back(make_pair(type, numrep)); + } + break; + + case CRUSH_RULE_EMIT: + ldout(cct, 10) << " emit " << w << dendl; + if (!type_stack.empty()) { + int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, + i, used, &w); + if (r < 0) + return r; + type_stack.clear(); + } + for (auto item : w) { + out->push_back(item); + } + w.clear(); + break; + + default: + // ignore + break; + } + } + + return 0; +} diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index ddebdafc801..49ef1b89f1f 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1170,6 +1170,25 @@ public: out[i] = rawout[i]; } + int _choose_type_stack( + CephContext *cct, + const vector>& stack, + const set& overfull, + const vector& underfull, + const vector& orig, + vector::const_iterator& i, + set& used, + vector *pw) const; + + int try_remap_rule( + CephContext *cct, + int rule, + int maxout, + const set& overfull, + const vector& underfull, + const vector& orig, + vector *out) const; + bool check_crush_rule(int ruleset, int type, int size, ostream& ss) { assert(crush); diff --git a/src/test/crush/CrushWrapper.cc b/src/test/crush/CrushWrapper.cc index dd846e7d3fb..6e535ff6089 100644 --- a/src/test/crush/CrushWrapper.cc +++ b/src/test/crush/CrushWrapper.cc @@ -1093,6 +1093,182 @@ TEST(CrushWrapper, populate_and_cleanup_classes) { ASSERT_FALSE(c.name_exists("default~ssd")); } +TEST(CrushWrapper, try_remap_rule) { + // build a simple 2 level map + CrushWrapper c; + c.create(); + c.set_type_name(0, "osd"); + c.set_type_name(1, "host"); + c.set_type_name(2, "rack"); + c.set_type_name(3, "root"); + int bno; + int r = c.add_bucket(0, CRUSH_BUCKET_STRAW2, + CRUSH_HASH_DEFAULT, 3, 0, NULL, + NULL, &bno); + ASSERT_EQ(0, r); + ASSERT_EQ(-1, bno); + c.set_item_name(bno, "default"); + + c.set_max_devices(20); + + //JSONFormatter jf(true); + + map loc; + loc["host"] = "foo"; + loc["rack"] = "a"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 0, 1, "osd.0", loc); + c.insert_item(g_ceph_context, 1, 1, "osd.1", loc); + c.insert_item(g_ceph_context, 2, 1, "osd.2", loc); + + loc.clear(); + loc["host"] = "bar"; + loc["rack"] = "a"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 3, 1, "osd.3", loc); + c.insert_item(g_ceph_context, 4, 1, "osd.4", loc); + c.insert_item(g_ceph_context, 5, 1, "osd.5", loc); + + loc.clear(); + loc["host"] = "baz"; + loc["rack"] = "b"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 6, 1, "osd.6", loc); + c.insert_item(g_ceph_context, 7, 1, "osd.7", loc); + c.insert_item(g_ceph_context, 8, 1, "osd.8", loc); + + loc.clear(); + loc["host"] = "qux"; + loc["rack"] = "b"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 9, 1, "osd.9", loc); + c.insert_item(g_ceph_context, 10, 1, "osd.10", loc); + c.insert_item(g_ceph_context, 11, 1, "osd.11", loc); + c.finalize(); + + loc.clear(); + loc["host"] = "bif"; + loc["rack"] = "c"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 12, 1, "osd.12", loc); + c.insert_item(g_ceph_context, 13, 1, "osd.13", loc); + c.insert_item(g_ceph_context, 14, 1, "osd.14", loc); + c.finalize(); + + loc.clear(); + loc["host"] = "pop"; + loc["rack"] = "c"; + loc["root"] = "default"; + c.insert_item(g_ceph_context, 15, 1, "osd.15", loc); + c.insert_item(g_ceph_context, 16, 1, "osd.16", loc); + c.insert_item(g_ceph_context, 17, 1, "osd.17", loc); + c.finalize(); + + //c.dump(&jf); + //jf.flush(cout); + + // take + emit + { + } + + // take + choose device + emit + { + cout << "take + choose + emit" << std::endl; + ostringstream err; + int rule = c.add_simple_ruleset("one", "default", "osd", "firstn", 0, &err); + ASSERT_EQ(rule, 0); + + vector orig = { 0, 3, 9 }; + set overfull = { 3 }; + vector underfull = { 0, 2, 5, 8, 11 }; + vector out; + int r = c.try_remap_rule(g_ceph_context, rule, 3, + overfull, underfull, + orig, &out); + cout << orig << " -> r = " << (int)r << " out " << out << std::endl; + ASSERT_EQ(r, 0); + ASSERT_EQ(3u, out.size()); + ASSERT_EQ(0, out[0]); + ASSERT_EQ(2, out[1]); + ASSERT_EQ(9, out[2]); + + // make sure we cope with dups between underfull and future values in orig + underfull = {9, 0, 2, 5}; + orig = {1, 3, 9}; + + r = c.try_remap_rule(g_ceph_context, rule, 3, + overfull, underfull, + orig, &out); + cout << orig << " -> r = " << (int)r << " out " << out << std::endl; + ASSERT_EQ(r, 0); + ASSERT_EQ(3u, out.size()); + ASSERT_EQ(1, out[0]); + ASSERT_EQ(0, out[1]); + ASSERT_EQ(9, out[2]); + } + + // chooseleaf + { + cout << "take + chooseleaf + emit" << std::endl; + ostringstream err; + int rule = c.add_simple_ruleset("two", "default", "host", "firstn", 0, &err); + ASSERT_EQ(rule, 1); + + vector orig = { 0, 3, 9 }; + set overfull = { 3 }; + vector underfull = { 0, 2, 5, 8, 11 }; + vector out; + int r = c.try_remap_rule(g_ceph_context, rule, 3, + overfull, underfull, + orig, &out); + cout << orig << " -> r = " << (int)r << " out " << out << std::endl; + ASSERT_EQ(r, 0); + ASSERT_EQ(3u, out.size()); + ASSERT_EQ(0, out[0]); + ASSERT_EQ(5, out[1]); + ASSERT_EQ(9, out[2]); + } + + // choose + choose + { + cout << "take + choose + choose + choose + emit" << std::endl; + int rule = c.add_rule(5, 2, 0, 1, 10, 2); + ASSERT_EQ(2, rule); + c.set_rule_step_take(rule, 0, bno); + c.set_rule_step_choose_indep(rule, 1, 2, 2); + c.set_rule_step_choose_indep(rule, 2, 2, 1); + c.set_rule_step_choose_indep(rule, 3, 1, 0); + c.set_rule_step_emit(rule, 4); + + vector orig = { 0, 3, 16, 12 }; + set overfull = { 3, 12 }; + vector underfull = { 6, 7, 9, 3, 0, 1, 15, 16, 13, 2, 5, 8, 11 }; + vector out; + int r = c.try_remap_rule(g_ceph_context, rule, 3, + overfull, underfull, + orig, &out); + cout << orig << " -> r = " << (int)r << " out " << out << std::endl; + ASSERT_EQ(r, 0); + ASSERT_EQ(4u, out.size()); + ASSERT_EQ(0, out[0]); + ASSERT_EQ(5, out[1]); + ASSERT_EQ(16, out[2]); + ASSERT_EQ(13, out[3]); + + orig.pop_back(); + out.clear(); + r = c.try_remap_rule(g_ceph_context, rule, 3, + overfull, underfull, + orig, &out); + cout << orig << " -> r = " << (int)r << " out " << out << std::endl; + ASSERT_EQ(r, 0); + ASSERT_EQ(3u, out.size()); + ASSERT_EQ(0, out[0]); + ASSERT_EQ(5, out[1]); + ASSERT_EQ(16, out[2]); + } +} + int main(int argc, char **argv) { vector args; argv_to_vec(argc, (const char **)argv, args);