int b;
__u32 i;
+ /* Calculate the needed working space while we do other
+ finalization tasks. */
+ map->working_size = sizeof(struct crush_work);
+ /* Space for the array of pointers to per-bucket workspace */
+ map->working_size += map->max_buckets *
+ sizeof(struct crush_work_bucket *);
+
/* calc max_devices */
map->max_devices = 0;
for (b=0; b<map->max_buckets; b++) {
for (i=0; i<map->buckets[b]->size; i++)
if (map->buckets[b]->items[i] >= map->max_devices)
map->max_devices = map->buckets[b]->items[i] + 1;
+
+ switch (map->buckets[b]->alg) {
+ default:
+ /* The base case, permutation variables and
+ the pointer to the permutation array. */
+ map->working_size += sizeof(struct crush_work_bucket);
+ break;
+ }
+ /* Every bucket has a permutation array. */
+ map->working_size += map->buckets[b]->size * sizeof(__u32);
}
}
-
-
/** rules **/
int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
if (!bucket->h.items)
goto err;
- bucket->h.perm = malloc(sizeof(__u32)*size);
-
- if (!bucket->h.perm)
- goto err;
for (i=0; i<size; i++)
bucket->h.items[i] = items[i];
return bucket;
err:
- free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
- bucket->h.perm = malloc(sizeof(__u32)*size);
- if (!bucket->h.perm)
- goto err;
bucket->item_weights = malloc(sizeof(__u32)*size);
err:
free(bucket->sum_weights);
free(bucket->item_weights);
- free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
if (size == 0) {
bucket->h.items = NULL;
- bucket->h.perm = NULL;
bucket->h.weight = 0;
bucket->node_weights = NULL;
bucket->num_nodes = 0;
bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
- bucket->h.perm = malloc(sizeof(__u32)*size);
- if (!bucket->h.perm)
- goto err;
/* calc tree depth */
depth = calc_depth(size);
return bucket;
err:
free(bucket->node_weights);
- free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
- bucket->h.perm = malloc(sizeof(__u32)*size);
- if (!bucket->h.perm)
- goto err;
bucket->item_weights = malloc(sizeof(__u32)*size);
if (!bucket->item_weights)
goto err;
err:
free(bucket->straws);
free(bucket->item_weights);
- free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
- bucket->h.perm = malloc(sizeof(__u32)*size);
- if (!bucket->h.perm)
- goto err;
bucket->item_weights = malloc(sizeof(__u32)*size);
if (!bucket->item_weights)
goto err;
return bucket;
err:
free(bucket->item_weights);
- free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
bucket->h.items[newsize-1] = item;
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) {
return -ENOMEM;
} else {
bucket->node_weights = _realloc;
}
-
+
node = crush_calc_tree_node(newsize-1);
bucket->node_weights[node] = weight;
int item, int weight)
{
int newsize = bucket->h.size + 1;
-
+
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
int crush_bucket_add_item(struct crush_map *map,
struct crush_bucket *b, int item, int weight)
{
- /* invalidate perm cache */
- b->perm_n = 0;
-
switch (b->alg) {
case CRUSH_BUCKET_UNIFORM:
return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight);
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
return 0;
}
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
olddepth = calc_depth(bucket->h.size);
newdepth = calc_depth(newsize);
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
} else {
bucket->h.items = _realloc;
}
- if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
- return -ENOMEM;
- } else {
- bucket->h.perm = _realloc;
- }
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
{
- /* invalidate perm cache */
- b->perm_n = 0;
-
switch (b->alg) {
case CRUSH_BUCKET_UNIFORM:
return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item);
return -1;
}
-
/*
* bucket choose methods
*
* Since this is expensive, we optimize for the r=0 case, which
* captures the vast majority of calls.
*/
-static int bucket_perm_choose(struct crush_bucket *bucket,
+static int bucket_perm_choose(const struct crush_bucket *bucket,
+ struct crush_work_bucket *work,
int x, int r)
{
unsigned int pr = r % bucket->size;
unsigned int i, s;
/* start a new permutation if @x has changed */
- if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+ if (work->perm_x != (__u32)x || work->perm_n == 0) {
dprintk("bucket %d new x=%d\n", bucket->id, x);
- bucket->perm_x = x;
+ work->perm_x = x;
/* optimize common r=0 case */
if (pr == 0) {
s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
bucket->size;
- bucket->perm[0] = s;
- bucket->perm_n = 0xffff; /* magic value, see below */
+ work->perm[0] = s;
+ work->perm_n = 0xffff; /* magic value, see below */
goto out;
}
for (i = 0; i < bucket->size; i++)
- bucket->perm[i] = i;
- bucket->perm_n = 0;
- } else if (bucket->perm_n == 0xffff) {
+ work->perm[i] = i;
+ work->perm_n = 0;
+ } else if (work->perm_n == 0xffff) {
/* clean up after the r=0 case above */
for (i = 1; i < bucket->size; i++)
- bucket->perm[i] = i;
- bucket->perm[bucket->perm[0]] = 0;
- bucket->perm_n = 1;
+ work->perm[i] = i;
+ work->perm[work->perm[0]] = 0;
+ work->perm_n = 1;
}
/* calculate permutation up to pr */
- for (i = 0; i < bucket->perm_n; i++)
+ for (i = 0; i < work->perm_n; i++)
dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
- while (bucket->perm_n <= pr) {
- unsigned int p = bucket->perm_n;
+ while (work->perm_n <= pr) {
+ unsigned int p = work->perm_n;
/* no point in swapping the final entry */
if (p < bucket->size - 1) {
i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
(bucket->size - p);
if (i) {
- unsigned int t = bucket->perm[p + i];
- bucket->perm[p + i] = bucket->perm[p];
- bucket->perm[p] = t;
+ unsigned int t = work->perm[p + i];
+ work->perm[p + i] = work->perm[p];
+ work->perm[p] = t;
}
dprintk(" perm_choose swap %d with %d\n", p, p+i);
}
- bucket->perm_n++;
+ work->perm_n++;
}
for (i = 0; i < bucket->size; i++)
dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]);
- s = bucket->perm[pr];
+ s = work->perm[pr];
out:
dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
bucket->size, x, r, pr, s);
}
/* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
- int x, int r)
+static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
+ struct crush_work_bucket *work, int x, int r)
{
- return bucket_perm_choose(&bucket->h, x, r);
+ return bucket_perm_choose(&bucket->h, work, x, r);
}
/* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
+static int bucket_list_choose(const struct crush_bucket_list *bucket,
int x, int r)
{
int i;
w *= bucket->sum_weights[i];
w = w >> 16;
/*dprintk(" scaled %llx\n", w);*/
- if (w < bucket->item_weights[i])
+ if (w < bucket->item_weights[i]) {
return bucket->h.items[i];
+ }
}
dprintk("bad list sums for bucket %d\n", bucket->h.id);
return x & 1;
}
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
int x, int r)
{
int n;
/* straw */
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
int x, int r)
{
__u32 i;
iexpon = 15;
// figure out number of bits we need to shift and
- // do it in one step instead of iteratively
+ // do it in one step instead of iteratively
if (!(x & 0x18000)) {
int bits = __builtin_clz(x & 0x1FFFF) - 16;
x <<= bits;
*
*/
-static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
int x, int r)
{
unsigned int i, high = 0;
high_draw = draw;
}
}
+
return bucket->h.items[high];
}
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+static int crush_bucket_choose(const struct crush_bucket *in,
+ struct crush_work_bucket *work,
+ int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
- return bucket_uniform_choose((struct crush_bucket_uniform *)in,
- x, r);
+ return bucket_uniform_choose(
+ (const struct crush_bucket_uniform *)in,
+ work, x, r);
case CRUSH_BUCKET_LIST:
- return bucket_list_choose((struct crush_bucket_list *)in,
+ return bucket_list_choose((const struct crush_bucket_list *)in,
x, r);
case CRUSH_BUCKET_TREE:
- return bucket_tree_choose((struct crush_bucket_tree *)in,
+ return bucket_tree_choose((const struct crush_bucket_tree *)in,
x, r);
case CRUSH_BUCKET_STRAW:
- return bucket_straw_choose((struct crush_bucket_straw *)in,
- x, r);
+ return bucket_straw_choose(
+ (const struct crush_bucket_straw *)in,
+ x, r);
case CRUSH_BUCKET_STRAW2:
- return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
- x, r);
+ return bucket_straw2_choose(
+ (const struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
-
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
* @parent_r: r value passed from the parent
*/
static int crush_choose_firstn(const struct crush_map *map,
- struct crush_bucket *bucket,
+ struct crush_work *work,
+ const struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
int rep;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
- struct crush_bucket *in = bucket;
+ const struct crush_bucket *in = bucket;
int r;
int i;
int item = 0;
skip_rep = 0;
do {
retry_descent = 0;
- in = bucket; /* initial bucket */
+ in = bucket; /* initial bucket */
/* choose through intervening buckets */
flocal = 0;
if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
flocal > local_fallback_retries)
- item = bucket_perm_choose(in, x, r);
+ item = bucket_perm_choose(
+ in, work->work[-1-in->id],
+ x, r);
else
- item = crush_bucket_choose(in, x, r);
+ item = crush_bucket_choose(
+ in, work->work[-1-in->id],
+ x, r);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
skip_rep = 1;
sub_r = r >> (vary_r-1);
else
sub_r = 0;
- if (crush_choose_firstn(map,
- map->buckets[-1-item],
- weight, weight_max,
- x, stable ? 1 : outpos+1, 0,
- out2, outpos, count,
- recurse_tries, 0,
- local_retries,
- local_fallback_retries,
- 0,
- vary_r,
- stable,
- NULL,
- sub_r) <= outpos)
+ if (crush_choose_firstn(
+ map,
+ work,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, stable ? 1 : outpos+1, 0,
+ out2, outpos, count,
+ recurse_tries, 0,
+ local_retries,
+ local_fallback_retries,
+ 0,
+ vary_r,
+ stable,
+ NULL,
+ sub_r) <= outpos)
/* didn't get leaf */
reject = 1;
} else {
/* we already have a leaf! */
out2[outpos] = item;
- }
+ }
}
if (!reject) {
*
*/
static void crush_choose_indep(const struct crush_map *map,
- struct crush_bucket *bucket,
+ struct crush_work *work,
+ const struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int left, int numrep, int type,
int *out, int outpos,
int *out2,
int parent_r)
{
- struct crush_bucket *in = bucket;
+ const struct crush_bucket *in = bucket;
int endpos = outpos + left;
int rep;
unsigned int ftotal;
break;
}
- item = crush_bucket_choose(in, x, r);
+ item = crush_bucket_choose(
+ in, work->work[-1-in->id],
+ x, r);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
out[rep] = CRUSH_ITEM_NONE;
if (recurse_to_leaf) {
if (item < 0) {
- crush_choose_indep(map,
- map->buckets[-1-item],
- weight, weight_max,
- x, 1, numrep, 0,
- out2, rep,
- recurse_tries, 0,
- 0, NULL, r);
+ crush_choose_indep(
+ map,
+ work,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, 1, numrep, 0,
+ out2, rep,
+ recurse_tries, 0,
+ 0, NULL, r);
if (out2[rep] == CRUSH_ITEM_NONE) {
/* placed nothing; no leaf */
break;
#endif
}
+
+/* This takes a chunk of memory and sets it up to be a shiny new
+ working area for a CRUSH placement computation. It must be called
+ on any newly allocated memory before passing it in to
+ crush_do_rule. It may be used repeatedly after that, so long as the
+ map has not changed. If the map /has/ changed, you must make sure
+ the working size is no smaller than what was allocated and re-run
+ crush_init_workspace.
+
+ If you do retain the working space between calls to crush, make it
+ thread-local. If you reinstitute the locking I've spent so much
+ time getting rid of, I will be very unhappy with you. */
+
+void crush_init_workspace(const struct crush_map *m, void *v) {
+ /* We work by moving through the available space and setting
+ values and pointers as we go.
+
+ It's a bit like Forth's use of the 'allot' word since we
+ set the pointer first and then reserve the space for it to
+ point to by incrementing the point. */
+ struct crush_work *w = (struct crush_work *)v;
+ char *point = (char *)v;
+ __s32 b;
+ point += sizeof(struct crush_work *);
+ w->work = (struct crush_work_bucket **)point;
+ point += m->max_buckets * sizeof(struct crush_work_bucket *);
+ for (b = 0; b < m->max_buckets; ++b) {
+ if (m->buckets[b] == 0)
+ continue;
+
+ w->work[b] = (struct crush_work_bucket *) point;
+ switch (m->buckets[b]->alg) {
+ default:
+ point += sizeof(struct crush_work_bucket);
+ break;
+ }
+ w->work[b]->perm_x = 0;
+ w->work[b]->perm_n = 0;
+ w->work[b]->perm = (__u32 *)point;
+ point += m->buckets[b]->size * sizeof(__u32);
+ }
+ BUG_ON((char *)point - (char *)w != m->working_size);
+}
+
/**
* crush_do_rule - calculate a mapping with the given input and rule
* @map: the crush_map
* @result_max: maximum result size
* @weight: weight vector (for map leaves)
* @weight_max: size of weight vector
+ * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
* @scratch: scratch vector for private use; must be >= 3 * result_max
*/
int crush_do_rule(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
const __u32 *weight, int weight_max,
- int *scratch)
+ void *cwin, int *scratch)
{
int result_len;
+ struct crush_work *cw = cwin;
int *a = scratch;
int *b = scratch + result_max;
int *c = scratch + result_max*2;
int *o;
int osize;
int *tmp;
- struct crush_rule *rule;
+ const struct crush_rule *rule;
__u32 step;
int i, j;
int numrep;
w = a;
o = b;
+
for (step = 0; step < rule->len; step++) {
int firstn = 0;
- struct crush_rule_step *curstep = &rule->steps[step];
+ const struct crush_rule_step *curstep = &rule->steps[step];
switch (curstep->op) {
case CRUSH_RULE_TAKE:
recurse_tries = choose_tries;
osize += crush_choose_firstn(
map,
+ cw,
map->buckets[bno],
weight, weight_max,
x, numrep,
numrep : (result_max-osize));
crush_choose_indep(
map,
+ cw,
map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
break;
}
}
+
return result_len;
}