From 5c9a5c57517b3a345bccfcb9e0e8fc7113611390 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 Mar 2008 21:44:44 -0700 Subject: [PATCH] crush: use rule masks instead of hard coded rule numbers --- src/crush/CrushWrapper.h | 13 ++++--- src/crush/builder.c | 14 ++++---- src/crush/builder.h | 6 ++-- src/crush/crush.h | 15 ++++++++ src/crush/mapper.c | 32 ++++++++++++++--- src/crush/mapper.h | 1 + src/include/ceph_fs.h | 9 ----- src/kernel/osd_client.c | 17 +++------ src/kernel/osdmap.c | 3 ++ src/osd/OSDMap.cc | 78 +++++++++++++++++----------------------- src/osd/OSDMap.h | 11 ++---- 11 files changed, 107 insertions(+), 92 deletions(-) diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 1cdf8d10f4f2b..aa42dafe0d7e2 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -161,11 +161,11 @@ public: } /* modifiers */ - int add_rule(unsigned ruleno, int len) { + int add_rule(int len, int pool, int type, int minsize, int maxsize, int ruleno) { if (!crush) return -ENOENT; - crush_rule *n = crush_make_rule(len); - crush_add_rule(crush, ruleno, n); - return 0; + crush_rule *n = crush_make_rule(len, pool, type, minsize, maxsize); + ruleno = crush_add_rule(crush, n, ruleno); + return ruleno; } int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { if (!crush) return -ENOENT; @@ -256,6 +256,9 @@ public: return crush->device_offload[i]; } + int find_rule(int pool, int type, int size) { + return crush_find_rule(crush, pool, type, size); + } void do_rule(int rule, int x, vector& out, int maxout, int forcefeed) { int rawout[maxout]; @@ -322,6 +325,7 @@ public: if (!yes) continue; ::_encode_simple(crush->rules[i]->len, bl); + ::_encode_simple(crush->rules[i]->mask, bl); for (unsigned j=0; jrules[i]->len; j++) ::_encode_simple(crush->rules[i]->steps[j], bl); } @@ -432,6 +436,7 @@ public: ::_decode_simple(len, blp); crush->rules[i] = (crush_rule*)malloc(crush_rule_size(len)); crush->rules[i]->len = len; + ::_decode_simple(crush->rules[i]->mask, blp); for (unsigned j=0; jrules[i]->len; j++) ::_decode_simple(crush->rules[i]->steps[j], blp); } diff --git a/src/crush/builder.c b/src/crush/builder.c index 5fd743afe5c68..c1a8e23cd0e6f 100644 --- a/src/crush/builder.c +++ b/src/crush/builder.c @@ -52,16 +52,14 @@ void crush_finalize(struct crush_map *map) /** rules **/ -int crush_add_rule(struct crush_map *map, - int ruleno, - struct crush_rule *rule) +int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno) { int oldsize; - if (ruleno < 0) { + if (ruleno < 0) for (ruleno=0; ruleno < map->max_rules; ruleno++) if (map->rules[ruleno] == 0) break; - } + if (ruleno >= map->max_rules) { /* expand array */ oldsize = map->max_rules; @@ -75,11 +73,15 @@ int crush_add_rule(struct crush_map *map, return ruleno; } -struct crush_rule *crush_make_rule(int len) +struct crush_rule *crush_make_rule(int len, int pool, int type, int minsize, int maxsize) { struct crush_rule *rule; rule = malloc(crush_rule_size(len)); rule->len = len; + rule->mask.pool = pool; + rule->mask.type = type; + rule->mask.min_size = minsize; + rule->mask.max_size = maxsize; return rule; } diff --git a/src/crush/builder.h b/src/crush/builder.h index 6e0b0dd6c2d20..f5ef8769c98ef 100644 --- a/src/crush/builder.h +++ b/src/crush/builder.h @@ -11,11 +11,9 @@ extern struct crush_map *crush_create(); extern void crush_finalize(struct crush_map *map); /* rules */ -extern struct crush_rule *crush_make_rule(int len); +extern struct crush_rule *crush_make_rule(int len, int pool, int type, int minsize, int maxsize); extern void crush_rule_set_step(struct crush_rule *rule, int pos, int op, int arg1, int arg2); -extern int crush_add_rule(struct crush_map *map, - int ruleno, - struct crush_rule *rule); +extern int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno); /* buckets */ extern int crush_get_next_bucket_id(struct crush_map *map); diff --git a/src/crush/crush.h b/src/crush/crush.h index 61557ed1943f6..a443b95255fa9 100644 --- a/src/crush/crush.h +++ b/src/crush/crush.h @@ -28,14 +28,29 @@ enum { #define CRUSH_MAX_DEPTH 10 #define CRUSH_MAX_SET 10 +/* + * for specifying choose numrep relative to the max + * parameter passed to do_rule + */ +#define CRUSH_CHOOSE_N 0 +#define CRUSH_CHOOSE_N_MINUS(x) (-(x)) + struct crush_rule_step { __u32 op; __s32 arg1; __s32 arg2; }; +struct crush_rule_mask { + __u8 pool; + __u8 type; + __u8 min_size; + __u8 max_size; +}; + struct crush_rule { __u32 len; + struct crush_rule_mask mask; struct crush_rule_step steps[0]; }; diff --git a/src/crush/mapper.c b/src/crush/mapper.c index 52a2a8aaa3594..665b1371acc57 100644 --- a/src/crush/mapper.c +++ b/src/crush/mapper.c @@ -11,6 +11,22 @@ # include #endif + +int crush_find_rule(struct crush_map *map, int pool, int type, int size) +{ + int i; + for (i = 0; i < map->max_rules; i++) { + if (map->rules[i] && + map->rules[i]->mask.pool == pool && + map->rules[i]->mask.type == type && + map->rules[i]->mask.min_size <= size && + map->rules[i]->mask.max_size >= size) + return i; + } + return -1; +} + + /** bucket choose methods **/ /* uniform */ @@ -260,8 +276,7 @@ static int crush_choose(struct crush_map *map, int crush_do_rule(struct crush_map *map, - int ruleno, - int x, int *result, int result_max, + int ruleno, int x, int *result, int result_max, int forcefeed) /* -1 for none */ { int result_len; @@ -324,7 +339,17 @@ int crush_do_rule(struct crush_map *map, osize = 0; for (i = 0; i < wsize; i++) { + /* + * see CRUSH_N, CRUSH_N_MINUS macros. + * basically, numrep <= 0 means relative to + * the provided result_max + */ numrep = rule->steps[step].arg1; + if (numrep <= 0) { + numrep += result_max; + if (numrep <= 0) + continue; + } j = 0; if (osize == 0 && force_pos >= 0) { o[osize] = force_stack[force_pos]; @@ -346,10 +371,9 @@ int crush_do_rule(struct crush_map *map, case CRUSH_RULE_EMIT: - for (i=0; ir_pgid.pg.type) { - case CEPH_PG_TYPE_REP: - rule = CRUSH_REP_RULE(req->r_pgid.pg.size, req->r_pgid.pg.pool); - break; - default: - BUG_ON(1); - - return; /* remove compilation warning */ - } - nr_osds = crush_do_rule(osdc->osdmap->crush, rule, + ruleno = crush_find_rule(osdc->osdmap->crush, req->r_pgid.pg.pool, + req->r_pgid.pg.type, req->r_pgid.pg.size); + BUG_ON(ruleno < 0); /* fixme, need some proper error handling here */ + nr_osds = crush_do_rule(osdc->osdmap->crush, ruleno, req->r_pgid.pg.ps, osds, 10, req->r_pgid.pg.preferred); for (i=0; ilen = yes; + if ((err = ceph_decode_copy(p, end, &r->mask, 4)) < 0) /* 4 u8's */ + goto bad; for (j=0; jlen; j++) { if ((err = ceph_decode_32(p, end, &r->steps[j].op)) < 0) goto bad; diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index a7c3da120abb1..d805dc4be6a44 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -57,8 +57,8 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map= 12) { - int ndom = g_conf.osd_max_rep; + int ndom = MAX(g_conf.osd_max_rep, g_conf.osd_max_raid_width); + if (num_osd >= ndom*2) { int ritems[ndom]; int rweights[ndom]; @@ -91,34 +91,25 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map= i) { - crush_rule *rule = crush_make_rule(4); - crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0); - crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, i, 1); - crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_INDEP, 1, 0); - crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0); - crush_add_rule(crush.crush, CRUSH_RAID_RULE(i, pool), rule); - } else { - crush_rule *rule = crush_make_rule(3); - crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0); - crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, i, 0); - crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0); - crush_add_rule(crush.crush, CRUSH_RAID_RULE(i, pool), rule); - } - } + for (int pool=0; pool<1; pool++) { + crush_rule *rule = crush_make_rule(4, pool, CEPH_PG_TYPE_RAID4, g_conf.osd_min_raid_width, g_conf.osd_max_raid_width); + crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0); + crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, CRUSH_CHOOSE_N, 1); + crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_INDEP, 1, 0); + crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0); + crush_add_rule(crush.crush, rule, -1); + } } else { // one bucket @@ -130,26 +121,23 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map= 0) + crush.do_rule(ruleno, pg.ps(), osds, pg.size(), pg.preferred()); } break; -- 2.39.5