#include "mapper.h"
#include "builder.h"
+#include "include/err.h"
#include "include/encodable.h"
#include <stdlib.h>
#include <map>
#include <set>
+#include <string>
class CrushWrapper {
public:
- struct crush_map *map;
+ struct crush_map *crush;
+ map<int, string> type_map; /* bucket type names */
+ map<int, string> name_map; /* bucket/device names */
+
+ /* reverse maps */
+ map<string, int> type_rmap, name_rmap;
+
+private:
+ void build_rmaps() {
+ build_rmap(type_map, type_rmap);
+ build_rmap(name_map, name_rmap);
+ }
+ void build_rmap(map<int, string> &f, std::map<string, int> &r) {
+ r.clear();
+ for (std::map<int, string>::iterator p = f.begin(); p != f.end(); p++)
+ r[p->second] = p->first;
+ }
- CrushWrapper() : map(0) {}
+public:
+ CrushWrapper() : crush(0) {}
~CrushWrapper() {
- if (map) crush_destroy(map);
+ if (crush) crush_destroy(crush);
}
+ /* building */
void create() {
- if (map) crush_destroy(map);
- map = crush_create();
+ if (crush) crush_destroy(crush);
+ crush = crush_create();
+ }
+
+ /*** types and names ***/
+ int get_type_id(const char *s) {
+ string name(s);
+ if (type_rmap.count(name))
+ return type_rmap[name];
+ return 0;
+ }
+ const char *get_type_name(int t) {
+ if (type_map.count(t))
+ return type_map[t].c_str();
+ }
+ int get_name_id(const char *s) {
+ string name(s);
+ if (type_rmap.count(name))
+ return type_rmap[name];
+ return 0;
+ }
+
+ /*** rules ***/
+private:
+ crush_rule *get_rule(unsigned ruleno) {
+ if (!crush) return (crush_rule *)(-ENOENT);
+ if (crush->max_rules >= ruleno)
+ return 0;
+ return crush->rules[ruleno];
+ }
+ crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) {
+ crush_rule *n = get_rule(ruleno);
+ if (!n) return (crush_rule_step *)(-EINVAL);
+ if (step >= n->len) return (crush_rule_step *)(-EINVAL);
+ return &n->steps[step];
+ }
+
+public:
+ /* accessors */
+ int get_max_rules() {
+ if (!crush) return 0;
+ return crush->max_rules;
+ }
+ int get_rule_op(unsigned ruleno, unsigned step) {
+ crush_rule_step *s = get_rule_step(ruleno, step);
+ if (IS_ERR(s)) return PTR_ERR(s);
+ return s->op;
+ }
+ int get_rule_arg1(unsigned ruleno, unsigned step) {
+ crush_rule_step *s = get_rule_step(ruleno, step);
+ if (IS_ERR(s)) return PTR_ERR(s);
+ return s->arg1;
+ }
+ int get_rule_arg2(unsigned ruleno, unsigned step) {
+ crush_rule_step *s = get_rule_step(ruleno, step);
+ if (IS_ERR(s)) return PTR_ERR(s);
+ return s->arg2;
+ }
+
+ /* modifiers */
+ int add_rule(unsigned ruleno, int len) {
+ if (!crush) return -ENOENT;
+ crush_rule *n = crush_make_rule(len);
+ crush_add_rule(crush, ruleno, n);
+ return 0;
+ }
+ int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
+ if (!crush) return -ENOENT;
+ crush_rule *n = get_rule(ruleno);
+ if (!n) return -1;
+ crush_rule_set_step(n, step, op, arg1, arg2);
+ return 0;
}
+ int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
+ return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
+ }
+ int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
+ return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
+ }
+ int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
+ return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
+ }
+ int set_rule_step_emit(unsigned ruleno, unsigned step) {
+ return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
+ }
+
+
+
+
+
+
+
+
+
+
+
+
void finalize() {
- assert(map);
- crush_finalize(map);
+ assert(crush);
+ crush_finalize(crush);
}
+
+
+
+
void set_offload(int i, unsigned o) {
- assert(i < map->max_devices);
- map->device_offload[i] = o;
+ assert(i < crush->max_devices);
+ crush->device_offload[i] = o;
}
unsigned get_offload(int i) {
- assert(i < map->max_devices);
- return map->device_offload[i];
+ assert(i < crush->max_devices);
+ return crush->device_offload[i];
}
void do_rule(int rule, int x, vector<int>& out, int maxout, int forcefeed) {
int rawout[maxout];
- int numrep = crush_do_rule(map, rule, x, rawout, maxout, forcefeed);
+ int numrep = crush_do_rule(crush, rule, x, rawout, maxout, forcefeed);
out.resize(numrep);
for (int i=0; i<numrep; i++)
}
void _encode(bufferlist &bl) {
- ::_encode_simple(map->max_buckets, bl);
- ::_encode_simple(map->max_rules, bl);
- ::_encode_simple(map->max_devices, bl);
+ ::_encode_simple(crush->max_buckets, bl);
+ ::_encode_simple(crush->max_rules, bl);
+ ::_encode_simple(crush->max_devices, bl);
// simple arrays
- bl.append((char*)map->device_offload, sizeof(map->device_offload[0]) * map->max_devices);
+ bl.append((char*)crush->device_offload, sizeof(crush->device_offload[0]) * crush->max_devices);
// buckets
- for (unsigned i=0; i<map->max_buckets; i++) {
+ for (unsigned i=0; i<crush->max_buckets; i++) {
__u32 type = 0;
- if (map->buckets[i]) type = map->buckets[i]->bucket_type;
+ if (crush->buckets[i]) type = crush->buckets[i]->bucket_type;
::_encode_simple(type, bl);
if (!type) continue;
- ::_encode_simple(map->buckets[i]->id, bl);
- ::_encode_simple(map->buckets[i]->type, bl);
- ::_encode_simple(map->buckets[i]->bucket_type, bl);
- ::_encode_simple(map->buckets[i]->weight, bl);
- ::_encode_simple(map->buckets[i]->size, bl);
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_encode_simple(map->buckets[i]->items[j], bl);
+ ::_encode_simple(crush->buckets[i]->id, bl);
+ ::_encode_simple(crush->buckets[i]->type, bl);
+ ::_encode_simple(crush->buckets[i]->bucket_type, bl);
+ ::_encode_simple(crush->buckets[i]->weight, bl);
+ ::_encode_simple(crush->buckets[i]->size, bl);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_encode_simple(crush->buckets[i]->items[j], bl);
- switch (map->buckets[i]->type) {
+ switch (crush->buckets[i]->type) {
case CRUSH_BUCKET_UNIFORM:
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_encode_simple(((crush_bucket_uniform*)map->buckets[i])->primes[j], bl);
- ::_encode_simple(((crush_bucket_uniform*)map->buckets[i])->item_weight, bl);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_encode_simple(((crush_bucket_uniform*)crush->buckets[i])->primes[j], bl);
+ ::_encode_simple(((crush_bucket_uniform*)crush->buckets[i])->item_weight, bl);
break;
case CRUSH_BUCKET_LIST:
- for (unsigned j=0; j<map->buckets[i]->size; j++) {
- ::_encode_simple(((crush_bucket_list*)map->buckets[i])->item_weights[j], bl);
- ::_encode_simple(((crush_bucket_list*)map->buckets[i])->sum_weights[j], bl);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++) {
+ ::_encode_simple(((crush_bucket_list*)crush->buckets[i])->item_weights[j], bl);
+ ::_encode_simple(((crush_bucket_list*)crush->buckets[i])->sum_weights[j], bl);
}
break;
case CRUSH_BUCKET_TREE:
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_encode_simple(((crush_bucket_tree*)map->buckets[i])->node_weights[j], bl);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_encode_simple(((crush_bucket_tree*)crush->buckets[i])->node_weights[j], bl);
break;
case CRUSH_BUCKET_STRAW:
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_encode_simple(((crush_bucket_straw*)map->buckets[i])->straws[j], bl);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_encode_simple(((crush_bucket_straw*)crush->buckets[i])->straws[j], bl);
break;
}
}
// rules
- for (unsigned i=0; i<map->max_rules; i++) {
- __u32 yes = map->rules[i] ? 1:0;
+ for (unsigned i=0; i<crush->max_rules; i++) {
+ __u32 yes = crush->rules[i] ? 1:0;
::_encode_simple(yes, bl);
if (!yes) continue;
- ::_encode_simple(map->rules[i]->len, bl);
- for (unsigned j=0; j<map->rules[i]->len; j++)
- ::_encode_simple(map->rules[i]->steps[j], bl);
+ ::_encode_simple(crush->rules[i]->len, bl);
+ for (unsigned j=0; j<crush->rules[i]->len; j++)
+ ::_encode_simple(crush->rules[i]->steps[j], bl);
}
}
void _decode(bufferlist::iterator &blp) {
create();
- ::_decode_simple(map->max_buckets, blp);
- ::_decode_simple(map->max_rules, blp);
- ::_decode_simple(map->max_devices, blp);
+ ::_decode_simple(crush->max_buckets, blp);
+ ::_decode_simple(crush->max_rules, blp);
+ ::_decode_simple(crush->max_devices, blp);
- map->device_offload = (__u32*)malloc(sizeof(map->device_offload[0])*map->max_devices);
- blp.copy(sizeof(map->device_offload[0])*map->max_devices, (char*)map->device_offload);
+ crush->device_offload = (__u32*)malloc(sizeof(crush->device_offload[0])*crush->max_devices);
+ blp.copy(sizeof(crush->device_offload[0])*crush->max_devices, (char*)crush->device_offload);
// buckets
- map->buckets = (crush_bucket**)malloc(sizeof(crush_bucket*)*map->max_buckets);
- for (unsigned i=0; i<map->max_buckets; i++) {
+ crush->buckets = (crush_bucket**)malloc(sizeof(crush_bucket*)*crush->max_buckets);
+ for (unsigned i=0; i<crush->max_buckets; i++) {
__u32 type;
::_decode_simple(type, blp);
if (!type) {
- map->buckets[i] = 0;
+ crush->buckets[i] = 0;
continue;
}
default:
assert(0);
}
- map->buckets[i] = (crush_bucket*)malloc(size);
- memset(map->buckets[i], 0, size);
+ crush->buckets[i] = (crush_bucket*)malloc(size);
+ memset(crush->buckets[i], 0, size);
- ::_decode_simple(map->buckets[i]->id, blp);
- ::_decode_simple(map->buckets[i]->type, blp);
- ::_decode_simple(map->buckets[i]->bucket_type, blp);
- ::_decode_simple(map->buckets[i]->weight, blp);
- ::_decode_simple(map->buckets[i]->size, blp);
+ ::_decode_simple(crush->buckets[i]->id, blp);
+ ::_decode_simple(crush->buckets[i]->type, blp);
+ ::_decode_simple(crush->buckets[i]->bucket_type, blp);
+ ::_decode_simple(crush->buckets[i]->weight, blp);
+ ::_decode_simple(crush->buckets[i]->size, blp);
- map->buckets[i]->items = (__s32*)malloc(sizeof(__s32)*map->buckets[i]->size);
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_decode_simple(map->buckets[i]->items[j], blp);
+ crush->buckets[i]->items = (__s32*)malloc(sizeof(__s32)*crush->buckets[i]->size);
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_decode_simple(crush->buckets[i]->items[j], blp);
- switch (map->buckets[i]->type) {
+ switch (crush->buckets[i]->type) {
case CRUSH_BUCKET_UNIFORM:
- ((crush_bucket_uniform*)map->buckets[i])->primes =
- (__u32*)malloc(map->buckets[i]->size * sizeof(__u32));
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_decode_simple(((crush_bucket_uniform*)map->buckets[i])->primes[j], blp);
- ::_decode_simple(((crush_bucket_uniform*)map->buckets[i])->item_weight, blp);
+ ((crush_bucket_uniform*)crush->buckets[i])->primes =
+ (__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_decode_simple(((crush_bucket_uniform*)crush->buckets[i])->primes[j], blp);
+ ::_decode_simple(((crush_bucket_uniform*)crush->buckets[i])->item_weight, blp);
break;
case CRUSH_BUCKET_LIST:
- ((crush_bucket_list*)map->buckets[i])->item_weights =
- (__u32*)malloc(map->buckets[i]->size * sizeof(__u32));
- ((crush_bucket_list*)map->buckets[i])->sum_weights =
- (__u32*)malloc(map->buckets[i]->size * sizeof(__u32));
-
- for (unsigned j=0; j<map->buckets[i]->size; j++) {
- ::_decode_simple(((crush_bucket_list*)map->buckets[i])->item_weights[j], blp);
- ::_decode_simple(((crush_bucket_list*)map->buckets[i])->sum_weights[j], blp);
+ ((crush_bucket_list*)crush->buckets[i])->item_weights =
+ (__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
+ ((crush_bucket_list*)crush->buckets[i])->sum_weights =
+ (__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
+
+ for (unsigned j=0; j<crush->buckets[i]->size; j++) {
+ ::_decode_simple(((crush_bucket_list*)crush->buckets[i])->item_weights[j], blp);
+ ::_decode_simple(((crush_bucket_list*)crush->buckets[i])->sum_weights[j], blp);
}
break;
case CRUSH_BUCKET_TREE:
- ((crush_bucket_tree*)map->buckets[i])->node_weights =
- (__u32*)malloc(map->buckets[i]->size * sizeof(__u32));
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_decode_simple(((crush_bucket_tree*)map->buckets[i])->node_weights[j], blp);
+ ((crush_bucket_tree*)crush->buckets[i])->node_weights =
+ (__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_decode_simple(((crush_bucket_tree*)crush->buckets[i])->node_weights[j], blp);
break;
case CRUSH_BUCKET_STRAW:
- ((crush_bucket_straw*)map->buckets[i])->straws =
- (__u32*)malloc(map->buckets[i]->size * sizeof(__u32));
- for (unsigned j=0; j<map->buckets[i]->size; j++)
- ::_decode_simple(((crush_bucket_straw*)map->buckets[i])->straws[j], blp);
+ ((crush_bucket_straw*)crush->buckets[i])->straws =
+ (__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
+ for (unsigned j=0; j<crush->buckets[i]->size; j++)
+ ::_decode_simple(((crush_bucket_straw*)crush->buckets[i])->straws[j], blp);
break;
}
}
// rules
- map->rules = (crush_rule**)malloc(sizeof(crush_rule*)*map->max_rules);
- for (unsigned i=0; i<map->max_rules; i++) {
+ crush->rules = (crush_rule**)malloc(sizeof(crush_rule*)*crush->max_rules);
+ for (unsigned i=0; i<crush->max_rules; i++) {
__u32 yes;
::_decode_simple(yes, blp);
if (!yes) {
- map->rules[i] = 0;
+ crush->rules[i] = 0;
continue;
}
__u32 len;
::_decode_simple(len, blp);
- map->rules[i] = (crush_rule*)malloc(crush_rule_size(len));
- map->rules[i]->len = len;
- for (unsigned j=0; j<map->rules[i]->len; j++)
- ::_decode_simple(map->rules[i]->steps[j], blp);
+ crush->rules[i] = (crush_rule*)malloc(crush_rule_size(len));
+ crush->rules[i]->len = len;
+ for (unsigned j=0; j<crush->rules[i]->len; j++)
+ ::_decode_simple(crush->rules[i]->steps[j], blp);
}
finalize();
}
crush_bucket_uniform *domain = crush_make_uniform_bucket(1, j, items, 0x10000);
- ritems[i] = crush_add_bucket(crush.map, (crush_bucket*)domain);
+ ritems[i] = crush_add_bucket(crush.crush, (crush_bucket*)domain);
dout(20) << "added domain bucket i " << ritems[i] << " of size " << j << dendl;
}
// root
crush_bucket_list *root = crush_make_list_bucket(2, ndom, ritems, rweights);
- int rootid = crush_add_bucket(crush.map, (crush_bucket*)root);
+ int rootid = crush_add_bucket(crush.crush, (crush_bucket*)root);
// rules
// replication
crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_FIRSTN, i, 1);
crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_FIRSTN, 1, 0);
crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
- crush_add_rule(crush.map, CRUSH_REP_RULE(i), rule);
+ crush_add_rule(crush.crush, CRUSH_REP_RULE(i), rule);
}
// raid
crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, i, 1);
crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_INDEP, 1, 0);
crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
- crush_add_rule(crush.map, CRUSH_RAID_RULE(i), rule);
+ crush_add_rule(crush.crush, CRUSH_RAID_RULE(i), rule);
} else {
crush_rule *rule = crush_make_rule(3);
crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, i, 0);
crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
- crush_add_rule(crush.map, CRUSH_RAID_RULE(i), rule);
+ crush_add_rule(crush.crush, CRUSH_RAID_RULE(i), rule);
}
}
items[i] = i;
crush_bucket_uniform *b = crush_make_uniform_bucket(1, g_conf.num_osd, items, 0x10000);
- int root = crush_add_bucket(crush.map, (crush_bucket*)b);
+ int root = crush_add_bucket(crush.crush, (crush_bucket*)b);
// rules
// replication
crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, root, 0);
crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_FIRSTN, i, 0);
crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
- crush_add_rule(crush.map, CRUSH_REP_RULE(i), rule);
+ crush_add_rule(crush.crush, CRUSH_REP_RULE(i), rule);
}
// raid4
for (int i=g_conf.osd_min_raid_width; i <= g_conf.osd_max_raid_width; i++) {
crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, root, 0);
crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, i, 0);
crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
- crush_add_rule(crush.map, CRUSH_RAID_RULE(i), rule);
+ crush_add_rule(crush.crush, CRUSH_RAID_RULE(i), rule);
}
}
for (int i=0; i<g_conf.num_osd; i++)
crush.set_offload(i, CEPH_OSD_IN);
- dout(20) << "crush max_devices " << crush.map->max_devices << dendl;
+ dout(20) << "crush max_devices " << crush.crush->max_devices << dendl;
}