]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crush: Remove mutable part of CRUSH map
authorAdam C. Emerson <aemerson@redhat.com>
Tue, 28 Jun 2016 21:55:39 +0000 (17:55 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 9 Nov 2016 18:54:37 +0000 (13:54 -0500)
Then add it to the working state. It would be very nice if we didn't
have to take a lock to calculate a crush placement. By moving the
permutation array into the working data, we can treat the CRUSH map as
immutable.

Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h
src/crush/builder.c
src/crush/crush.c
src/crush/crush.h
src/crush/mapper.c
src/crush/mapper.h
src/test/crush/crush.cc
src/test/erasure-code/TestErasureCodeIsa.cc
src/test/erasure-code/TestErasureCodeJerasure.cc
src/test/erasure-code/TestErasureCodeLrc.cc

index 490c79fc97a1c48d99227e56eca553ba8c2dfd84..afc91f5d301c55ef2278aca21560d36f119a0580 100644 (file)
@@ -1365,9 +1365,6 @@ void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator
     ::decode(bucket->items[j], blp);
   }
 
-  bucket->perm = (__u32*)calloc(1, bucket->size * sizeof(__u32));
-  bucket->perm_n = 0;
-
   switch (bucket->alg) {
   case CRUSH_BUCKET_UNIFORM:
     ::decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
index 077417d0ee4c7741782edb7c0d791e68cc62f72b..36013321aa25252a94c6c435daf4705633524c91 100644 (file)
@@ -1091,23 +1091,25 @@ public:
     Mutex::Locker l(mapper_lock);
     int rawout[maxout];
     int scratch[maxout * 3];
-    int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], weight.size(), scratch);
+    char work[crush->working_size];
+    crush_init_workspace(crush, work);
+    int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
+                              weight.size(), work, scratch);
     if (numrep < 0)
       numrep = 0;
     out.resize(numrep);
     for (int i=0; i<numrep; i++)
       out[i] = rawout[i];
   }
-  
+
   bool check_crush_rule(int ruleset, int type, int size,  ostream& ss) {
-   
-    assert(crush);    
+    assert(crush);
 
     __u32 i;
     for (i = 0; i < crush->max_rules; i++) {
       if (crush->rules[i] &&
-          crush->rules[i]->mask.ruleset == ruleset &&
-          crush->rules[i]->mask.type == type) {
+         crush->rules[i]->mask.ruleset == ruleset &&
+         crush->rules[i]->mask.type == type) {
 
         if (crush->rules[i]->mask.min_size <= size &&
             crush->rules[i]->mask.max_size >= size) {
index 9331f6d8bada9a369bb57b6dbdf43e5b41380316..71a6264e76a913f9abd2300f2abb7d10bafec5d8 100644 (file)
@@ -45,6 +45,13 @@ void crush_finalize(struct crush_map *map)
        int b;
        __u32 i;
 
+       /* Calculate the needed working space while we do other
+          finalization tasks. */
+       map->working_size = sizeof(struct crush_work);
+       /* Space for the array of pointers to per-bucket workspace */
+       map->working_size += map->max_buckets *
+               sizeof(struct crush_work_bucket *);
+
        /* calc max_devices */
        map->max_devices = 0;
        for (b=0; b<map->max_buckets; b++) {
@@ -53,13 +60,21 @@ void crush_finalize(struct crush_map *map)
                for (i=0; i<map->buckets[b]->size; i++)
                        if (map->buckets[b]->items[i] >= map->max_devices)
                                map->max_devices = map->buckets[b]->items[i] + 1;
+
+               switch (map->buckets[b]->alg) {
+               default:
+                       /* The base case, permutation variables and
+                          the pointer to the permutation array. */
+                       map->working_size += sizeof(struct crush_work_bucket);
+                       break;
+               }
+               /* Every bucket has a permutation array. */
+               map->working_size += map->buckets[b]->size * sizeof(__u32);
        }
 }
 
 
 
-
-
 /** rules **/
 
 int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
@@ -212,16 +227,11 @@ crush_make_uniform_bucket(int hash, int type, int size,
         if (!bucket->h.items)
                 goto err;
 
-        bucket->h.perm = malloc(sizeof(__u32)*size);
-
-        if (!bucket->h.perm)
-                goto err;
        for (i=0; i<size; i++)
                bucket->h.items[i] = items[i];
 
        return bucket;
 err:
-        free(bucket->h.perm);
         free(bucket->h.items);
         free(bucket);
         return NULL;
@@ -251,9 +261,6 @@ crush_make_list_bucket(int hash, int type, int size,
        bucket->h.items = malloc(sizeof(__s32)*size);
         if (!bucket->h.items)
                 goto err;
-       bucket->h.perm = malloc(sizeof(__u32)*size);
-        if (!bucket->h.perm)
-                goto err;
 
 
         bucket->item_weights = malloc(sizeof(__u32)*size);
@@ -282,7 +289,6 @@ crush_make_list_bucket(int hash, int type, int size,
 err:
         free(bucket->sum_weights);
         free(bucket->item_weights);
-        free(bucket->h.perm);
         free(bucket->h.items);
         free(bucket);
         return NULL;
@@ -347,7 +353,6 @@ crush_make_tree_bucket(int hash, int type, int size,
 
        if (size == 0) {
                bucket->h.items = NULL;
-               bucket->h.perm = NULL;
                bucket->h.weight = 0;
                bucket->node_weights = NULL;
                bucket->num_nodes = 0;
@@ -358,9 +363,6 @@ crush_make_tree_bucket(int hash, int type, int size,
        bucket->h.items = malloc(sizeof(__s32)*size);
         if (!bucket->h.items)
                 goto err;
-       bucket->h.perm = malloc(sizeof(__u32)*size);
-        if (!bucket->h.perm)
-                goto err;
 
        /* calc tree depth */
        depth = calc_depth(size);
@@ -399,7 +401,6 @@ crush_make_tree_bucket(int hash, int type, int size,
        return bucket;
 err:
         free(bucket->node_weights);
-        free(bucket->h.perm);
         free(bucket->h.items);
         free(bucket);
         return NULL;
@@ -577,9 +578,6 @@ crush_make_straw_bucket(struct crush_map *map,
         bucket->h.items = malloc(sizeof(__s32)*size);
         if (!bucket->h.items)
                 goto err;
-       bucket->h.perm = malloc(sizeof(__u32)*size);
-        if (!bucket->h.perm)
-                goto err;
        bucket->item_weights = malloc(sizeof(__u32)*size);
         if (!bucket->item_weights)
                 goto err;
@@ -601,7 +599,6 @@ crush_make_straw_bucket(struct crush_map *map,
 err:
         free(bucket->straws);
         free(bucket->item_weights);
-        free(bucket->h.perm);
         free(bucket->h.items);
         free(bucket);
         return NULL;
@@ -630,9 +627,6 @@ crush_make_straw2_bucket(struct crush_map *map,
         bucket->h.items = malloc(sizeof(__s32)*size);
         if (!bucket->h.items)
                 goto err;
-       bucket->h.perm = malloc(sizeof(__u32)*size);
-        if (!bucket->h.perm)
-                goto err;
        bucket->item_weights = malloc(sizeof(__u32)*size);
         if (!bucket->item_weights)
                 goto err;
@@ -647,7 +641,6 @@ crush_make_straw2_bucket(struct crush_map *map,
        return bucket;
 err:
         free(bucket->item_weights);
-        free(bucket->h.perm);
         free(bucket->h.items);
         free(bucket);
         return NULL;
@@ -698,11 +691,6 @@ int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item,
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
 
        bucket->h.items[newsize-1] = item;
 
@@ -725,11 +713,6 @@ int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int w
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -775,17 +758,12 @@ int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int w
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) {
                return -ENOMEM;
        } else {
                bucket->node_weights = _realloc;
        }
-       
+
        node = crush_calc_tree_node(newsize-1);
        bucket->node_weights[node] = weight;
 
@@ -824,7 +802,7 @@ int crush_add_straw_bucket_item(struct crush_map *map,
                                int item, int weight)
 {
        int newsize = bucket->h.size + 1;
-       
+
        void *_realloc = NULL;
 
        if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
@@ -832,11 +810,6 @@ int crush_add_straw_bucket_item(struct crush_map *map,
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -873,11 +846,6 @@ int crush_add_straw2_bucket_item(struct crush_map *map,
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -899,9 +867,6 @@ int crush_add_straw2_bucket_item(struct crush_map *map,
 int crush_bucket_add_item(struct crush_map *map,
                          struct crush_bucket *b, int item, int weight)
 {
-       /* invalidate perm cache */
-       b->perm_n = 0;
-
        switch (b->alg) {
        case CRUSH_BUCKET_UNIFORM:
                return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight);
@@ -945,11 +910,6 @@ int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int it
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        return 0;
 }
 
@@ -984,11 +944,6 @@ int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item)
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -1053,11 +1008,6 @@ int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
                } else {
                        bucket->h.items = _realloc;
                }
-               if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-                       return -ENOMEM;
-               } else {
-                       bucket->h.perm = _realloc;
-               }
 
                olddepth = calc_depth(bucket->h.size);
                newdepth = calc_depth(newsize);
@@ -1106,11 +1056,6 @@ int crush_remove_straw_bucket_item(struct crush_map *map,
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -1155,11 +1100,6 @@ int crush_remove_straw2_bucket_item(struct crush_map *map,
        } else {
                bucket->h.items = _realloc;
        }
-       if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
-               return -ENOMEM;
-       } else {
-               bucket->h.perm = _realloc;
-       }
        if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
                return -ENOMEM;
        } else {
@@ -1171,9 +1111,6 @@ int crush_remove_straw2_bucket_item(struct crush_map *map,
 
 int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
 {
-       /* invalidate perm cache */
-       b->perm_n = 0;
-
        switch (b->alg) {
        case CRUSH_BUCKET_UNIFORM:
                return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item);
index 80d7c3a97cb84355e82e9d8f4c83fbf5b0d82893..5bf94c04f64547e2cfff79c0655bcc68944f4e12 100644 (file)
@@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 
 void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
 {
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
@@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
 {
        kfree(b->item_weights);
        kfree(b->sum_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
 
 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
 {
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b->node_weights);
        kfree(b);
@@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 {
        kfree(b->straws);
        kfree(b->item_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
@@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
 {
        kfree(b->item_weights);
-       kfree(b->h.perm);
        kfree(b->h.items);
        kfree(b);
 }
index be8f12b8f1950499380c10de27ab6928df25fd8e..d2c235af690fbfa11c0cf10da46cd7dfc283747a 100644 (file)
@@ -135,13 +135,6 @@ struct crush_bucket {
        __u32 size;      /* num items */
        __s32 *items;
 
-       /*
-        * cached random permutation: used for uniform bucket and for
-        * the linear search fallback for the other bucket types.
-        */
-       __u32 perm_x;  /* @x for which *perm is defined */
-       __u32 perm_n;  /* num elements of *perm that are permuted/defined */
-       __u32 *perm;
 };
 
 struct crush_bucket_uniform {
@@ -211,6 +204,19 @@ struct crush_map {
         * device fails. */
        __u8 chooseleaf_stable;
 
+       /* This value is calculated after decode or construction by
+          the builder. It is exposed here (rather than having a
+          'build CRUSH working space' function) so that callers can
+          reserve a static buffer, allocate space on the stack, or
+          otherwise avoid calling into the heap allocator if they
+          want to. The size of the working space depends on the map,
+          while the size of the scratch vector passed to the mapper
+          depends on the size of the desired result set.
+
+          Nothing stops the caller from allocating both in one swell
+          foop and passing in two points, though. */
+       size_t working_size;
+
 #ifndef __KERNEL__
        /*
         * version 0 (original) of straw_calc has various flaws.  version 1
@@ -248,4 +254,26 @@ static inline int crush_calc_tree_node(int i)
        return ((i+1) << 1)-1;
 }
 
+/* ---------------------------------------------------------------------
+                              Private
+   --------------------------------------------------------------------- */
+
+/* These data structures are private to the CRUSH implementation. They
+   are exposed in this header file because builder needs their
+   definitions to calculate the total working size.
+
+   Moving this out of the crush map allow us to treat the CRUSH map as
+   immutable within the mapper and removes the requirement for a CRUSH
+   map lock. */
+
+struct crush_work_bucket {
+       __u32 perm_x; /* @x for which *perm is defined */
+       __u32 perm_n; /* num elements of *perm that are permuted/defined */
+       __u32 *perm;  /* Permutation of the bucket's items */
+};
+
+struct crush_work {
+       struct crush_work_bucket **work; /* Per-bucket working store */
+};
+
 #endif
index d565a67b149b79f18d7c46ce428a778ed2cb8489..4d37e0e9829f6528ec4d3296d397a49a867be225 100644 (file)
@@ -52,7 +52,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
        return -1;
 }
 
-
 /*
  * bucket choose methods
  *
@@ -70,59 +69,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
  * Since this is expensive, we optimize for the r=0 case, which
  * captures the vast majority of calls.
  */
-static int bucket_perm_choose(struct crush_bucket *bucket,
+static int bucket_perm_choose(const struct crush_bucket *bucket,
+                             struct crush_work_bucket *work,
                              int x, int r)
 {
        unsigned int pr = r % bucket->size;
        unsigned int i, s;
 
        /* start a new permutation if @x has changed */
-       if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+       if (work->perm_x != (__u32)x || work->perm_n == 0) {
                dprintk("bucket %d new x=%d\n", bucket->id, x);
-               bucket->perm_x = x;
+               work->perm_x = x;
 
                /* optimize common r=0 case */
                if (pr == 0) {
                        s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
                                bucket->size;
-                       bucket->perm[0] = s;
-                       bucket->perm_n = 0xffff;   /* magic value, see below */
+                       work->perm[0] = s;
+                       work->perm_n = 0xffff;   /* magic value, see below */
                        goto out;
                }
 
                for (i = 0; i < bucket->size; i++)
-                       bucket->perm[i] = i;
-               bucket->perm_n = 0;
-       } else if (bucket->perm_n == 0xffff) {
+                       work->perm[i] = i;
+               work->perm_n = 0;
+       } else if (work->perm_n == 0xffff) {
                /* clean up after the r=0 case above */
                for (i = 1; i < bucket->size; i++)
-                       bucket->perm[i] = i;
-               bucket->perm[bucket->perm[0]] = 0;
-               bucket->perm_n = 1;
+                       work->perm[i] = i;
+               work->perm[work->perm[0]] = 0;
+               work->perm_n = 1;
        }
 
        /* calculate permutation up to pr */
-       for (i = 0; i < bucket->perm_n; i++)
+       for (i = 0; i < work->perm_n; i++)
                dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
-       while (bucket->perm_n <= pr) {
-               unsigned int p = bucket->perm_n;
+       while (work->perm_n <= pr) {
+               unsigned int p = work->perm_n;
                /* no point in swapping the final entry */
                if (p < bucket->size - 1) {
                        i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
                                (bucket->size - p);
                        if (i) {
-                               unsigned int t = bucket->perm[p + i];
-                               bucket->perm[p + i] = bucket->perm[p];
-                               bucket->perm[p] = t;
+                               unsigned int t = work->perm[p + i];
+                               work->perm[p + i] = work->perm[p];
+                               work->perm[p] = t;
                        }
                        dprintk(" perm_choose swap %d with %d\n", p, p+i);
                }
-               bucket->perm_n++;
+               work->perm_n++;
        }
        for (i = 0; i < bucket->size; i++)
                dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
 
-       s = bucket->perm[pr];
+       s = work->perm[pr];
 out:
        dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
                bucket->size, x, r, pr, s);
@@ -130,14 +130,14 @@ out:
 }
 
 /* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
-                                int x, int r)
+static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
+                                struct crush_work_bucket *work, int x, int r)
 {
-       return bucket_perm_choose(&bucket->h, x, r);
+       return bucket_perm_choose(&bucket->h, work, x, r);
 }
 
 /* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
+static int bucket_list_choose(const struct crush_bucket_list *bucket,
                              int x, int r)
 {
        int i;
@@ -153,8 +153,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
                w *= bucket->sum_weights[i];
                w = w >> 16;
                /*dprintk(" scaled %llx\n", w);*/
-               if (w < bucket->item_weights[i])
+               if (w < bucket->item_weights[i]) {
                        return bucket->h.items[i];
+               }
        }
 
        dprintk("bad list sums for bucket %d\n", bucket->h.id);
@@ -190,7 +191,7 @@ static int terminal(int x)
        return x & 1;
 }
 
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
                              int x, int r)
 {
        int n;
@@ -222,7 +223,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
 
 /* straw */
 
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
                               int x, int r)
 {
        __u32 i;
@@ -255,7 +256,7 @@ static __u64 crush_ln(unsigned int xin)
        iexpon = 15;
 
        // figure out number of bits we need to shift and
-       // do it in one step instead of iteratively     
+       // do it in one step instead of iteratively
        if (!(x & 0x18000)) {
          int bits = __builtin_clz(x & 0x1FFFF) - 16;
          x <<= bits;
@@ -297,7 +298,7 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
-static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
                                int x, int r)
 {
        unsigned int i, high = 0;
@@ -340,37 +341,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
                        high_draw = draw;
                }
        }
+
        return bucket->h.items[high];
 }
 
 
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+static int crush_bucket_choose(const struct crush_bucket *in,
+                              struct crush_work_bucket *work,
+                              int x, int r)
 {
        dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
        BUG_ON(in->size == 0);
        switch (in->alg) {
        case CRUSH_BUCKET_UNIFORM:
-               return bucket_uniform_choose((struct crush_bucket_uniform *)in,
-                                         x, r);
+               return bucket_uniform_choose(
+                       (const struct crush_bucket_uniform *)in,
+                       work, x, r);
        case CRUSH_BUCKET_LIST:
-               return bucket_list_choose((struct crush_bucket_list *)in,
+               return bucket_list_choose((const struct crush_bucket_list *)in,
                                          x, r);
        case CRUSH_BUCKET_TREE:
-               return bucket_tree_choose((struct crush_bucket_tree *)in,
+               return bucket_tree_choose((const struct crush_bucket_tree *)in,
                                          x, r);
        case CRUSH_BUCKET_STRAW:
-               return bucket_straw_choose((struct crush_bucket_straw *)in,
-                                          x, r);
+               return bucket_straw_choose(
+                       (const struct crush_bucket_straw *)in,
+                       x, r);
        case CRUSH_BUCKET_STRAW2:
-               return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
-                                           x, r);
+               return bucket_straw2_choose(
+                       (const struct crush_bucket_straw2 *)in,
+                       x, r);
        default:
                dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
                return in->items[0];
        }
 }
 
-
 /*
  * true if device is marked "out" (failed, fully offloaded)
  * of the cluster
@@ -412,7 +418,8 @@ static int is_out(const struct crush_map *map,
  * @parent_r: r value passed from the parent
  */
 static int crush_choose_firstn(const struct crush_map *map,
-                              struct crush_bucket *bucket,
+                              struct crush_work *work,
+                              const struct crush_bucket *bucket,
                               const __u32 *weight, int weight_max,
                               int x, int numrep, int type,
                               int *out, int outpos,
@@ -430,7 +437,7 @@ static int crush_choose_firstn(const struct crush_map *map,
        int rep;
        unsigned int ftotal, flocal;
        int retry_descent, retry_bucket, skip_rep;
-       struct crush_bucket *in = bucket;
+       const struct crush_bucket *in = bucket;
        int r;
        int i;
        int item = 0;
@@ -452,7 +459,7 @@ parent_r %d stable %d\n",
                skip_rep = 0;
                do {
                        retry_descent = 0;
-                       in = bucket;               /* initial bucket */
+                       in = bucket;              /* initial bucket */
 
                        /* choose through intervening buckets */
                        flocal = 0;
@@ -471,9 +478,13 @@ parent_r %d stable %d\n",
                                if (local_fallback_retries > 0 &&
                                    flocal >= (in->size>>1) &&
                                    flocal > local_fallback_retries)
-                                       item = bucket_perm_choose(in, x, r);
+                                       item = bucket_perm_choose(
+                                               in, work->work[-1-in->id],
+                                               x, r);
                                else
-                                       item = crush_bucket_choose(in, x, r);
+                                       item = crush_bucket_choose(
+                                               in, work->work[-1-in->id],
+                                               x, r);
                                if (item >= map->max_devices) {
                                        dprintk("   bad item %d\n", item);
                                        skip_rep = 1;
@@ -516,25 +527,27 @@ parent_r %d stable %d\n",
                                                        sub_r = r >> (vary_r-1);
                                                else
                                                        sub_r = 0;
-                                               if (crush_choose_firstn(map,
-                                                        map->buckets[-1-item],
-                                                        weight, weight_max,
-                                                        x, stable ? 1 : outpos+1, 0,
-                                                        out2, outpos, count,
-                                                        recurse_tries, 0,
-                                                        local_retries,
-                                                        local_fallback_retries,
-                                                        0,
-                                                        vary_r,
-                                                        stable,
-                                                        NULL,
-                                                        sub_r) <= outpos)
+                                               if (crush_choose_firstn(
+                                                           map,
+                                                           work,
+                                                           map->buckets[-1-item],
+                                                           weight, weight_max,
+                                                           x, stable ? 1 : outpos+1, 0,
+                                                           out2, outpos, count,
+                                                           recurse_tries, 0,
+                                                           local_retries,
+                                                           local_fallback_retries,
+                                                           0,
+                                                           vary_r,
+                                                           stable,
+                                                           NULL,
+                                                           sub_r) <= outpos)
                                                        /* didn't get leaf */
                                                        reject = 1;
                                        } else {
                                                /* we already have a leaf! */
                                                out2[outpos] = item;
-                                       }
+               }
                                }
 
                                if (!reject) {
@@ -598,7 +611,8 @@ reject:
  *
  */
 static void crush_choose_indep(const struct crush_map *map,
-                              struct crush_bucket *bucket,
+                              struct crush_work *work,
+                              const struct crush_bucket *bucket,
                               const __u32 *weight, int weight_max,
                               int x, int left, int numrep, int type,
                               int *out, int outpos,
@@ -608,7 +622,7 @@ static void crush_choose_indep(const struct crush_map *map,
                               int *out2,
                               int parent_r)
 {
-       struct crush_bucket *in = bucket;
+       const struct crush_bucket *in = bucket;
        int endpos = outpos + left;
        int rep;
        unsigned int ftotal;
@@ -676,7 +690,9 @@ static void crush_choose_indep(const struct crush_map *map,
                                        break;
                                }
 
-                               item = crush_bucket_choose(in, x, r);
+                               item = crush_bucket_choose(
+                                       in, work->work[-1-in->id],
+                                       x, r);
                                if (item >= map->max_devices) {
                                        dprintk("   bad item %d\n", item);
                                        out[rep] = CRUSH_ITEM_NONE;
@@ -722,13 +738,15 @@ static void crush_choose_indep(const struct crush_map *map,
 
                                if (recurse_to_leaf) {
                                        if (item < 0) {
-                                               crush_choose_indep(map,
-                                                  map->buckets[-1-item],
-                                                  weight, weight_max,
-                                                  x, 1, numrep, 0,
-                                                  out2, rep,
-                                                  recurse_tries, 0,
-                                                  0, NULL, r);
+                                               crush_choose_indep(
+                                                       map,
+                                                       work,
+                                                       map->buckets[-1-item],
+                                                       weight, weight_max,
+                                                       x, 1, numrep, 0,
+                                                       out2, rep,
+                                                       recurse_tries, 0,
+                                                       0, NULL, r);
                                                if (out2[rep] == CRUSH_ITEM_NONE) {
                                                        /* placed nothing; no leaf */
                                                        break;
@@ -779,6 +797,50 @@ static void crush_choose_indep(const struct crush_map *map,
 #endif
 }
 
+
+/* This takes a chunk of memory and sets it up to be a shiny new
+   working area for a CRUSH placement computation. It must be called
+   on any newly allocated memory before passing it in to
+   crush_do_rule. It may be used repeatedly after that, so long as the
+   map has not changed. If the map /has/ changed, you must make sure
+   the working size is no smaller than what was allocated and re-run
+   crush_init_workspace.
+
+   If you do retain the working space between calls to crush, make it
+   thread-local. If you reinstitute the locking I've spent so much
+   time getting rid of, I will be very unhappy with you. */
+
+void crush_init_workspace(const struct crush_map *m, void *v) {
+       /* We work by moving through the available space and setting
+          values and pointers as we go.
+
+          It's a bit like Forth's use of the 'allot' word since we
+          set the pointer first and then reserve the space for it to
+          point to by incrementing the point. */
+       struct crush_work *w = (struct crush_work *)v;
+       char *point = (char *)v;
+       __s32 b;
+       point += sizeof(struct crush_work *);
+       w->work = (struct crush_work_bucket **)point;
+       point += m->max_buckets * sizeof(struct crush_work_bucket *);
+       for (b = 0; b < m->max_buckets; ++b) {
+               if (m->buckets[b] == 0)
+                       continue;
+
+               w->work[b] = (struct crush_work_bucket *) point;
+               switch (m->buckets[b]->alg) {
+               default:
+                       point += sizeof(struct crush_work_bucket);
+                       break;
+               }
+               w->work[b]->perm_x = 0;
+               w->work[b]->perm_n = 0;
+               w->work[b]->perm = (__u32 *)point;
+               point += m->buckets[b]->size * sizeof(__u32);
+       }
+       BUG_ON((char *)point - (char *)w != m->working_size);
+}
+
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
  * @map: the crush_map
@@ -788,14 +850,16 @@ static void crush_choose_indep(const struct crush_map *map,
  * @result_max: maximum result size
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
+ * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
  * @scratch: scratch vector for private use; must be >= 3 * result_max
  */
 int crush_do_rule(const struct crush_map *map,
                  int ruleno, int x, int *result, int result_max,
                  const __u32 *weight, int weight_max,
-                 int *scratch)
+                 void *cwin, int *scratch)
 {
        int result_len;
+       struct crush_work *cw = cwin;
        int *a = scratch;
        int *b = scratch + result_max;
        int *c = scratch + result_max*2;
@@ -805,7 +869,7 @@ int crush_do_rule(const struct crush_map *map,
        int *o;
        int osize;
        int *tmp;
-       struct crush_rule *rule;
+       const struct crush_rule *rule;
        __u32 step;
        int i, j;
        int numrep;
@@ -836,9 +900,10 @@ int crush_do_rule(const struct crush_map *map,
        w = a;
        o = b;
 
+
        for (step = 0; step < rule->len; step++) {
                int firstn = 0;
-               struct crush_rule_step *curstep = &rule->steps[step];
+               const struct crush_rule_step *curstep = &rule->steps[step];
 
                switch (curstep->op) {
                case CRUSH_RULE_TAKE:
@@ -934,6 +999,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_tries = choose_tries;
                                        osize += crush_choose_firstn(
                                                map,
+                                               cw,
                                                map->buckets[bno],
                                                weight, weight_max,
                                                x, numrep,
@@ -954,6 +1020,7 @@ int crush_do_rule(const struct crush_map *map,
                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
+                                               cw,
                                                map->buckets[bno],
                                                weight, weight_max,
                                                x, out_size, numrep,
@@ -995,5 +1062,6 @@ int crush_do_rule(const struct crush_map *map,
                        break;
                }
        }
+
        return result_len;
 }
index 5dfd5b1125d2b257a4a00d1e77661613ca2227ec..0b0f05e0cdc6ca06482b92d98dc2cadc7978f9c0 100644 (file)
@@ -15,6 +15,8 @@ extern int crush_do_rule(const struct crush_map *map,
                         int ruleno,
                         int x, int *result, int result_max,
                         const __u32 *weights, int weight_max,
-                        int *scratch);
+                        void *cwin, int *scratch);
+
+extern void crush_init_workspace(const struct crush_map *m, void *v);
 
 #endif
index c46fa87ab5409b8c693053e6fcc2f8d012faa67e..6d659ae19b1c5beaeacdbba9136efa3e1a4d5cfb 100644 (file)
@@ -68,6 +68,8 @@ CrushWrapper *build_indep_map(CephContext *cct, int num_rack, int num_host,
   assert(ret == 0);
   c->set_rule_name(ruleno, "data");
 
+  c->finalize();
+
   if (false) {
     Formatter *f = Formatter::create("json-pretty");
     f->open_object_section("crush_map");
@@ -291,6 +293,8 @@ TEST(CRUSH, straw_zero) {
                                       "firstn", pg_pool_t::TYPE_REPLICATED);
   EXPECT_EQ(1, ruleset1);
 
+  c->finalize();
+
   vector<unsigned> reweight(n, 0x10000);
   for (int i=0; i<10000; ++i) {
     vector<int> out0, out1;
@@ -382,6 +386,8 @@ TEST(CRUSH, straw_same) {
     jf.flush(cout);
   }
 
+  c->finalize();
+
   vector<int> sum0(n, 0), sum1(n, 0);
   vector<unsigned> reweight(n, 0x10000);
   int different = 0;
@@ -451,6 +457,8 @@ double calc_straw2_stddev(int *weights, int n, bool verbose)
   totalweight /= (double)0x10000;
   double avgweight = totalweight / n;
 
+  c->finalize();
+
   int total = 1000000;
   for (int i=0; i<total; ++i) {
     vector<int> out;
@@ -591,6 +599,8 @@ TEST(CRUSH, straw2_reweight) {
   totalweight /= (double)0x10000;
   double avgweight = totalweight / n;
 
+  c->finalize();
+
   int total = 1000000;
   for (int i=0; i<total; ++i) {
     vector<int> out0, out1;
index 2b794ce6c7a424c3acef887ffcd9d17e3ed35d52..382d789a360e2eea143c42c205e0595662222d58 100644 (file)
@@ -905,6 +905,8 @@ TEST_F(IsaErasureCodeTest, create_ruleset)
     }
   }
 
+  c->finalize();
+
   {
     stringstream ss;
     ErasureCodeIsaDefault isa(tcache);
index c8f0e37da87237cf46b92ad56b8feb5481574ed3..01c27d95439cd2c3c257dc83d2ce3622f5bdd906 100644 (file)
@@ -307,6 +307,8 @@ TEST(ErasureCodeTest, create_ruleset)
     }
   }
 
+  c->finalize();
+
   {
     stringstream ss;
     ErasureCodeJerasureReedSolomonVandermonde jerasure;
index 758fd8e9e192ff37d6bbbdc166db74782693c4d5..5d03467805cf778dc2f548bd6cdc108ff27327ed 100644 (file)
@@ -130,6 +130,8 @@ TEST(ErasureCodeTest, create_ruleset)
     }
   }
 
+  c->finalize();
+
   ErasureCodeLrc lrc(g_conf->erasure_code_dir);
   EXPECT_EQ(0, lrc.create_ruleset("rule1", *c, &cerr));