From 49c14acbf2b75df73e6168428fb9fe708698d600 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 17 Apr 2008 11:31:53 -0700 Subject: [PATCH] crushtool basically working --- src/crush/CrushWrapper.h | 3 + src/crush/builder.c | 13 +- src/crush/crush.h | 9 + src/crush/grammar.h | 25 ++- src/crush/sample.txt | 20 +- src/crushtool.cc | 414 +++++++++++++++++++++++++++++++++------ 6 files changed, 410 insertions(+), 74 deletions(-) diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index da3c05d8bd7f5..6c9011c07157f 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -296,6 +296,9 @@ public: crush_finalize(crush); } + void set_max_devices(int m) { + crush->max_devices = m; + } void set_offload(int i, unsigned o) { assert(i < crush->max_devices); crush->device_offload[i] = o; diff --git a/src/crush/builder.c b/src/crush/builder.c index 4deac66e760d3..5f6985570e92b 100644 --- a/src/crush/builder.c +++ b/src/crush/builder.c @@ -209,11 +209,16 @@ crush_make_list_bucket(int type, int size, bucket->item_weights = malloc(sizeof(__u32)*size); bucket->sum_weights = malloc(sizeof(__u32)*size); w = 0; - for (i=size-1; i>=0; i--) { - bucket->h.items[i] = items[i]; - bucket->item_weights[i] = weights[i]; + /* + * caller will place new items at end. so, we reverse things, + * since we put new items at the beginning. + */ + for (i=0; ih.items[pos] = items[i]; + bucket->item_weights[pos] = weights[i]; w += weights[i]; - bucket->sum_weights[i] = w; + bucket->sum_weights[pos] = w; /*printf("%d item %d weight %d sum %d\n", i, items[i], weights[i], bucket->sum_weights[i]);*/ } diff --git a/src/crush/crush.h b/src/crush/crush.h index 193e86b93934f..4259a9bec4b32 100644 --- a/src/crush/crush.h +++ b/src/crush/crush.h @@ -68,6 +68,15 @@ enum { CRUSH_BUCKET_TREE = 3, CRUSH_BUCKET_STRAW = 4 }; +static inline const char *crush_bucket_alg_name(int alg) { + switch (alg) { + case CRUSH_BUCKET_UNIFORM: return "uniform"; + case CRUSH_BUCKET_LIST: return "list"; + case CRUSH_BUCKET_TREE: return "tree"; + case CRUSH_BUCKET_STRAW: return "straw"; + default: return "unknown"; + } +} struct crush_bucket { __s32 id; /* this'll be negative */ diff --git a/src/crush/grammar.h b/src/crush/grammar.h index 9aafa6f9f1627..a4c17d0752805 100644 --- a/src/crush/grammar.h +++ b/src/crush/grammar.h @@ -37,8 +37,7 @@ struct crush_grammar : public grammar static const int _bucket = 17; static const int _step_take = 18; - static const int _step_choose_firstn = 19; - static const int _step_choose_indep = 20; + static const int _step_choose = 19; static const int _step_emit = 21; static const int _step = 22; static const int _crushrule = 23; @@ -63,8 +62,7 @@ struct crush_grammar : public grammar rule, parser_tag<_bucket> > bucket; rule, parser_tag<_step_take> > step_take; - rule, parser_tag<_step_choose_firstn> > step_choose_firstn; - rule, parser_tag<_step_choose_indep> > step_choose_indep; + rule, parser_tag<_step_choose> > step_choose; rule, parser_tag<_step_emit> > step_emit; rule, parser_tag<_step> > step; rule, parser_tag<_crushrule> > crushrule; @@ -82,14 +80,20 @@ struct crush_grammar : public grammar name = leaf_node_d[ lexeme_d[ +alnum_p ] ]; // devices - device = str_p("device") >> posint >> name >> !( str_p("offload") >> real_p ); + device = str_p("device") >> posint >> name + >> !( ( str_p("offload") >> real_p ) | + ( str_p("load") >> real_p ) | + str_p("down")); // bucket types bucket_type = str_p("type") >> posint >> name; // buckets bucket_id = str_p("id") >> negint; - bucket_alg = str_p("alg") >> ( str_p("uniform") | str_p("list") | str_p("tree") | str_p("straw") ); + bucket_alg = str_p("alg") >> ( str_p("uniform") | + str_p("list") | + str_p("tree") | + str_p("straw") ); bucket_item = str_p("item") >> name >> !( str_p("weight") >> real_p ) >> !( str_p("pos") >> posint ); @@ -97,10 +101,13 @@ struct crush_grammar : public grammar // rules step_take = str_p("take") >> str_p("root"); - step_choose_indep = str_p("choose_indep") >> integer >> name; - step_choose_firstn = str_p("choose_firstn") >> integer >> name; + step_choose = str_p("choose") >> ( str_p("indep") | str_p("firstn") ) + >> integer + >> str_p("type") >> name; step_emit = str_p("emit"); - step = str_p("step") >> ( step_take | step_choose_indep | step_choose_firstn | step_emit ); + step = str_p("step") >> ( step_take | + step_choose | + step_emit ); crushrule = str_p("rule") >> !name >> '{' >> str_p("pool") >> posint >> str_p("type") >> ( str_p("replicated") | str_p("raid4") ) diff --git a/src/crush/sample.txt b/src/crush/sample.txt index dcbe7dc84572c..aef39626d450e 100644 --- a/src/crush/sample.txt +++ b/src/crush/sample.txt @@ -2,7 +2,7 @@ # devices device 1 osd001 device 2 osd002 -device 3 osd003 +device 3 osd003 down # same as offload 1.0 device 4 osd004 offload 0 # 0.0 -> normal, 1.0 -> failed device 5 osd005 offload 0.1 device 6 osd006 offload 0.1 @@ -13,12 +13,12 @@ type 2 cab type 3 row type 10 pool -cab cabd2 { +cab root { id -1 # optional - alg straw # required + alg tree # required item osd001 item osd002 weight 600 pos 1 - item osd003 weight 600 pos 2 + item osd003 weight 600 pos 0 item osd004 weight 600 pos 3 item osd005 weight 600 pos 4 } @@ -32,6 +32,16 @@ rule normal { max_size 4 # need 1 or more of these. step take root - step choose_indep 0 osd + step choose firstn 0 type osd + step emit +} + +rule { + pool 1 + type raid4 + min_size 3 + max_size 6 + step take root + step choose indep 0 type osd step emit } diff --git a/src/crushtool.cc b/src/crushtool.cc index 1539ddfe5b2df..f62f7b33d4674 100644 --- a/src/crushtool.cc +++ b/src/crushtool.cc @@ -40,71 +40,342 @@ using namespace std; typedef char const* iterator_t; typedef tree_match parse_tree_match_t; typedef parse_tree_match_t::tree_iterator iter_t; +typedef parse_tree_match_t::node_t node_t; +int verbose = 0; + +map item_id; +map id_item; +map item_weight; + +map device_offload; // may or may not be set. + +map type_id; + +map rule_id; + +string string_node(node_t &node) +{ + return string(node.value.begin(), node.value.end()); +} + +int int_node(node_t &node) +{ + string str = string_node(node); + return strtol(str.c_str(), 0, 10); +} -void parse_device(iter_t const& i, CrushWrapper &crush) { +float float_node(node_t &node) +{ + string s = string_node(node); + return strtof(s.c_str(), 0); +} + +void parse_device(iter_t const& i, CrushWrapper &crush) +{ int size = i->children.size(); - string id_str = string(i->children[1].value.begin(), i->children[1].value.end()); - istringstream idStream(id_str);; - int id; - if (idStream>>id) { - cout << "id: " << id << std::endl; + int id = int_node(i->children[1]); + + string name = string_node(i->children[2]); + crush.set_item_name(id, name.c_str()); + if (item_id.count(name)) { + cerr << "item " << name << " defined twice" << std::endl; + exit(1); + } + item_id[name] = id; + id_item[id] = name; + + if (verbose) cout << "device " << id << " " << name; + + if (size >= 4) { + string tag = string_node(i->children[3]); + float offload; + if (tag == "offload") + offload = float_node(i->children[4]); + else if (tag == "load") + offload = 1.0 - float_node(i->children[4]); + else if (tag == "down") + offload = 1.0; + else + assert(0); + + if (verbose) cout << " offload " << offload; + if (offload < 0 || offload > 1.0) { + cerr << "illegal device offload " << offload << " on device " << id << " " << name + << " (valid range is [0,1])" << std::endl; + exit(1); + } + device_offload[id] = (unsigned)(offload * 0x10000); + } + if (verbose) cout << std::endl; + + if (id >= crush.get_max_devices()) + crush.set_max_devices(id+1); +} + +void parse_bucket_type(iter_t const& i, CrushWrapper &crush) +{ + int id = int_node(i->children[1]); + string name = string_node(i->children[2]); + if (verbose) cout << "type " << id << " " << name << std::endl; + type_id[name] = id; + crush.set_type_name(id, name.c_str()); +} + +void parse_bucket(iter_t const& i, CrushWrapper &crush) +{ + string tname = string_node(i->children[0]); + if (!type_id.count(tname)) { + cerr << "bucket type '" << tname << "' is not defined" << std::endl; + exit(1); } + int type = type_id[tname]; - string name = string(i->children[2].value.begin(), i->children[2].value.end()); - cout << "name: " << name << std::endl; - - // crush.crush->device_offload[1000] = 0; - - float offload = 0; - unsigned offload_fixed = 0; - bool have_offload = false; - if (size == 5) { - have_offload = true; - string offload_str = string(i->children[4].value.begin(), i->children[4].value.end()); - istringstream offloadStream(offload_str); - if (offloadStream>>offload) { - offload_fixed = static_cast( offload * 65536 ); - // mem not allocated for: crush.set_offload(id, offload_fixed) until crush.finalize() - + string name = string_node(i->children[1]); + if (item_id.count(name)) { + cerr << "bucket or device '" << name << "' is already defined" << std::endl; + exit(1); + } + + int id = 0; // none, yet! + int alg = -1; + set used_items; + int size = 0; + + for (unsigned p=3; pchildren.size()-1; p++) { + iter_t sub = i->children.begin() + p; + string tag = string_node(sub->children[0]); + //cout << "tag " << tag << std::endl; + if (tag == "id") + id = int_node(sub->children[1]); + else if (tag == "alg") { + string a = string_node(sub->children[1]); + if (a == "uniform") + alg = CRUSH_BUCKET_UNIFORM; + else if (a == "list") + alg = CRUSH_BUCKET_LIST; + else if (a == "tree") + alg = CRUSH_BUCKET_TREE; + else if (a == "straw") + alg = CRUSH_BUCKET_STRAW; + else { + cerr << "unknown bucket alg '" << a << "'" << std::endl; + exit(1); + } } + else if (tag == "item") { + // first, just determine which item pos's are already used + size++; + for (unsigned q = 2; q < sub->children.size(); q++) { + string tag = string_node(sub->children[q++]); + if (tag == "pos") { + int pos = int_node(sub->children[q]); + used_items.insert(pos); + } + } + } + else assert(0); } - crush.set_item_name(id, name.c_str()); + // now do the items. + if (!used_items.empty()) + size = MAX(size, *used_items.rbegin()); + vector items(size); + vector weights(size); + + int curpos = 0; + float bucketweight = 0; + for (unsigned p=3; pchildren.size()-1; p++) { + iter_t sub = i->children.begin() + p; + string tag = string_node(sub->children[0]); + if (tag == "item") { + + string iname = string_node(sub->children[1]); + if (!item_id.count(iname)) { + cerr << "item '" << iname << "' in bucket '" << name << "' is not defined" << std::endl; + exit(1); + } + int itemid = item_id[iname]; + + float weight = 1.0; + if (item_weight.count(itemid)) + weight = item_weight[itemid]; + + int pos = -1; + for (unsigned q = 2; q < sub->children.size(); q++) { + string tag = string_node(sub->children[q++]); + if (tag == "weight") + weight = float_node(sub->children[q]); + else if (tag == "pos") + pos = int_node(sub->children[q]); + else + assert(0); + } + if (pos < 0) { + while (used_items.count(curpos)) curpos++; + pos = curpos++; + } + //cout << " item " << iname << " (" << itemid << ") pos " << pos << " weight " << weight << std::endl; + items[pos] = itemid; + weights[pos] = (unsigned)(weight * 0x10000); + bucketweight += weight; + } + } + + if (id == 0) { + for (id=-1; id_item.count(id); id--) ; + //cout << "assigned id " << id << std::endl; + } + if (verbose) cout << "bucket " << name << " (" << id << ") " << size << " items and weight " << bucketweight << std::endl; + id_item[id] = name; + item_id[name] = id; + item_weight[id] = bucketweight; + + crush.add_bucket(id, alg, type, size, &items[0], &weights[0]); + crush.set_item_name(id, name.c_str()); } +void parse_rule(iter_t const& i, CrushWrapper &crush) +{ + int start; // rule name is optional! + + string rname = string_node(i->children[1]); + if (rname != "{") { + if (rule_id.count(rname)) { + cerr << "rule name '" << rname << "' already defined\n" << std::endl; + exit(1); + } + start = 4; + } else { + rname = string(); + start = 3; + } -void walk_crush_config(iter_t const& i, CrushWrapper &crush, int ind=1) -{ + int pool = int_node(i->children[start]); + + string tname = string_node(i->children[start+2]); + int type; + if (tname == "replicated") + type = CEPH_PG_TYPE_REP; + else if (tname == "raid4") + type = CEPH_PG_TYPE_RAID4; + else + assert(0); + + int minsize = int_node(i->children[start+4]); + int maxsize = int_node(i->children[start+6]); + + int steps = i->children.size() - start - 8; + //cout << "num steps " << steps << std::endl; + + int ruleno = crush.add_rule(steps, pool, type, minsize, maxsize, -1); + if (rname.length()) { + crush.set_rule_name(ruleno, rname.c_str()); + rule_id[rname] = ruleno; + } + + int step = 0; + for (iter_t p = i->children.begin() + start + 7; step < steps; p++) { + iter_t s = p->children.begin() + 1; + int stepid = s->value.id().to_long(); + switch (stepid) { + case crush_grammar::_step_take: + { + string item = string_node(s->children[1]); + if (!item_id.count(item)) { + cerr << "in rule '" << rname << "' item '" << item << "' not defined" << std::endl; + exit(1); + } + crush.set_rule_step_take(ruleno, step++, item_id[item]); + } + break; + + case crush_grammar::_step_choose: + { + string type = string_node(s->children[4]); + if (!type_id.count(type)) { + cerr << "in rule '" << rname << "' type '" << type << "' not defined" << std::endl; + exit(1); + } + string mode = string_node(s->children[1]); + if (mode == "firstn") + crush.set_rule_step_choose_firstn(ruleno, step++, int_node(s->children[2]), type_id[type]); + else if (mode == "indep") + crush.set_rule_step_choose_indep(ruleno, step++, int_node(s->children[2]), type_id[type]); + else + assert(0); + } + break; + + case crush_grammar::_step_emit: + crush.set_rule_step_emit(ruleno, step++); + break; + + default: + cerr << "bad crush step " << stepid << std::endl; + assert(0); + } + } + assert(step == steps); +} + +void dump(iter_t const& i, int ind=1) +{ cout << "dump"; for (int j=0; jvalue.id().to_long(); cout << id << "\t"; cout << "'" << string(i->value.begin(), i->value.end()) << "' " << i->children.size() << " children" << std::endl; + for (unsigned int j = 0; j < i->children.size(); j++) + dump(i->children.begin() + j, ind+1); +} - switch (i->value.id().to_long()) { - case crush_grammar::_device: - cout << "device" << std::endl; - parse_device(i, crush); - break; - case crush_grammar::_bucket_type: - cout << "type" << std::endl; - //parse_bucket_type(i->children.begin(), crush); - break; - case crush_grammar::_bucket: - cout << "bucket" << std::endl; - //parse_bucket(i->children.begin(), crush); - break; - case crush_grammar::_crushrule: - cout << "rule" << std::endl; - //parse_rule(i->children.begin(), crush); - break; - default: - for (unsigned int j = 0; j < i->children.size(); j++) - walk_crush_config(i->children.begin() + j, crush, ind+1); +void find_used_bucket_ids(iter_t const& i) +{ + for (iter_t p = i->children.begin(); p != i->children.end(); p++) { + if ((int)p->value.id().to_long() == crush_grammar::_bucket) { + iter_t firstline = p->children.begin() + 3; + string tag = string_node(firstline->children[0]); + if (tag == "id") { + int id = int_node(firstline->children[1]); + //cout << "saw bucket id " << id << std::endl; + id_item[id] = string(); + } + } + } +} + +void parse_crush(iter_t const& i, CrushWrapper &crush) +{ + find_used_bucket_ids(i); + + for (iter_t p = i->children.begin(); p != i->children.end(); p++) { + switch (p->value.id().to_long()) { + case crush_grammar::_device: + parse_device(p, crush); + break; + case crush_grammar::_bucket_type: + parse_bucket_type(p, crush); + break; + case crush_grammar::_bucket: + parse_bucket(p, crush); + break; + case crush_grammar::_crushrule: + parse_rule(p, crush); + break; + default: + assert(0); + } } + + //cout << "max_devices " << crush.get_max_devices() << std::endl; + crush.finalize(); + for (int i=0; i= 0) str.erase(n, str.length()-n); - if (verbose) cout << line << ": " << str << std::endl; + if (verbose>1) cout << line << ": " << str << std::endl; if (big.length()) big += " "; line_pos[big.length()] = line; @@ -149,7 +419,7 @@ int compile_crush_file(const char *infn, CrushWrapper &crush) big += str; } - if (verbose >= 2) cout << "whole file is: \"" << big << "\"" << std::endl; + if (verbose > 2) cout << "whole file is: \"" << big << "\"" << std::endl; crush_grammar crushg; const char *start = big.c_str(); @@ -171,8 +441,9 @@ int compile_crush_file(const char *infn, CrushWrapper &crush) return 1; } - cout << "parsing succeeded\n"; - walk_crush_config(info.trees.begin(), crush); + //cout << "parsing succeeded\n"; + //dump(info.trees.begin()); + parse_crush(info.trees.begin(), crush); return 0; } @@ -210,7 +481,9 @@ void print_rule_name(ostream& out, int t, CrushWrapper &crush) void print_fixedpoint(ostream& out, int i) { - out << (i / 0x10000); + char s[20]; + sprintf(s, "%.3f", (float)i / (float)0x10000); + out << s; } int decompile_crush(CrushWrapper &crush, ostream &out) @@ -219,6 +492,8 @@ int decompile_crush(CrushWrapper &crush, ostream &out) out << "# devices\n"; for (int i=0; i