From 0ed55e6150e3d82b2955cf3e9fa0f01b36a6474a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 Aug 2017 17:56:06 -0400 Subject: [PATCH] crush/CrushWrapper: fill in weight-sets when we build shadow trees When we build the shadow buckets for the class hierarchies, we need to fill in the weight-sets for each shadow bucket too. Skip the ids vector for now since it's not yet used by anything. Fixes: http://tracker.ceph.com/issues/20939 Signed-off-by: Sage Weil --- src/crush/CrushWrapper.cc | 62 +++++++++++++++++++++++++++++++--- src/crush/CrushWrapper.h | 3 +- src/test/crush/CrushWrapper.cc | 15 ++++---- 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 06dfceb4ae248..262db5876eb27 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -1407,6 +1407,10 @@ int CrushWrapper::populate_classes( used_ids.insert(q.second); } } + // accumulate weight values for each carg and bucket as we go. because it is + // depth first, we will have the nested bucket weights we need when we + // finish constructing the containing buckets. + map>> cmap_item_weight; // cargs -> bno -> weights set roots; find_nonshadow_roots(roots); for (auto &r : roots) { @@ -1415,7 +1419,7 @@ int CrushWrapper::populate_classes( for (auto &c : class_name) { int clone; int res = device_class_clone(r, c.first, old_class_bucket, used_ids, - &clone); + &clone, &cmap_item_weight); if (res < 0) return res; } @@ -1859,7 +1863,8 @@ int CrushWrapper::device_class_clone( int original_id, int device_class, const std::map>& old_class_bucket, const std::set& used_ids, - int *clone) + int *clone, + map>> *cmap_item_weight) { const char *item_name = get_item_name(original_id); if (item_name == NULL) @@ -1872,6 +1877,7 @@ int CrushWrapper::device_class_clone( *clone = get_item_id(copy_name); return 0; } + crush_bucket *original = get_bucket(original_id); assert(!IS_ERR(original)); crush_bucket *copy = crush_make_bucket(crush, @@ -1880,28 +1886,37 @@ int CrushWrapper::device_class_clone( original->type, 0, NULL, NULL); assert(copy); + + vector item_orig_pos; // new item pos -> orig item pos for (unsigned i = 0; i < original->size; i++) { int item = original->items[i]; int weight = crush_get_bucket_item_weight(original, i); if (item >= 0) { if (class_map.count(item) != 0 && class_map[item] == device_class) { - int res = bucket_add_item(copy, item, weight); + int res = crush_bucket_add_item(crush, copy, item, weight); if (res) return res; + } else { + continue; } } else { int child_copy_id; int res = device_class_clone(item, device_class, old_class_bucket, - used_ids, &child_copy_id); + used_ids, &child_copy_id, + cmap_item_weight); if (res < 0) return res; crush_bucket *child_copy = get_bucket(child_copy_id); assert(!IS_ERR(child_copy)); - res = bucket_add_item(copy, child_copy_id, child_copy->weight); + res = crush_bucket_add_item(crush, copy, child_copy_id, + child_copy->weight); if (res) return res; } + item_orig_pos.push_back(i); } + assert(item_orig_pos.size() == copy->size); + int bno = 0; if (old_class_bucket.count(original_id) && old_class_bucket.at(original_id).count(device_class)) { @@ -1919,14 +1934,51 @@ int CrushWrapper::device_class_clone( if (res) return res; assert(!bno || bno == *clone); + res = set_item_class(*clone, device_class); if (res < 0) return res; + // we do not use set_item_name because the name is intentionally invalid name_map[*clone] = copy_name; if (have_rmaps) name_rmap[copy_name] = *clone; class_bucket[original_id][device_class] = *clone; + + // set up choose_args for the new bucket. + for (auto& w : choose_args) { + crush_choose_arg_map& cmap = w.second; + if (-1-bno >= (int)cmap.size) { + unsigned new_size = -1-bno + 1; + cmap.args = (crush_choose_arg*)realloc(cmap.args, + new_size * sizeof(cmap.args[0])); + memset(cmap.args + cmap.size, 0, + (new_size - cmap.size) * sizeof(cmap.args[0])); + } + auto& o = cmap.args[-1-original_id]; + auto& n = cmap.args[-1-bno]; + n.ids_size = 0; // FIXME: implement me someday + n.weight_set_size = o.weight_set_size; + n.weight_set = (crush_weight_set*)calloc( + n.weight_set_size, sizeof(crush_weight_set)); + for (size_t s = 0; s < n.weight_set_size; ++s) { + n.weight_set[s].size = copy->size; + n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32)); + } + for (size_t s = 0; s < n.weight_set_size; ++s) { + vector bucket_weights(n.weight_set_size); + for (size_t i = 0; i < copy->size; ++i) { + int item = copy->items[i]; + if (item >= 0) { + n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]]; + } else { + n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s]; + } + bucket_weights[s] += n.weight_set[s].weights[i]; + } + (*cmap_item_weight)[w.first][bno] = bucket_weights; + } + } return 0; } diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index a75495bcc283d..9ef12f9b66269 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1212,7 +1212,8 @@ public: int original, int device_class, const std::map>& old_class_bucket, const std::set& used_ids, - int *clone); + int *clone, + map>> *cmap_item_weight); int rename_class(const string& srcname, const string& dstname); int populate_classes( const std::map>& old_class_bucket); diff --git a/src/test/crush/CrushWrapper.cc b/src/test/crush/CrushWrapper.cc index cf651eb2a9636..cbd9dd30a6b08 100644 --- a/src/test/crush/CrushWrapper.cc +++ b/src/test/crush/CrushWrapper.cc @@ -1111,10 +1111,11 @@ TEST(CrushWrapper, trim_roots_with_class) { int root_id = c.get_item_id("default"); int clone_id; map> old_class_bucket; + map>> cmap_item_weight; // cargs -> bno -> weights set used_ids; ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids, - &clone_id), 0); + &clone_id, &cmap_item_weight), 0); ASSERT_TRUE(c.name_exists("default")); ASSERT_TRUE(c.name_exists("default~ssd")); @@ -1145,11 +1146,12 @@ TEST(CrushWrapper, device_class_clone) { c.reweight(g_ceph_context); map> old_class_bucket; + map>> cmap_item_weight; // cargs -> bno -> weights set used_ids; int root_id = c.get_item_id("default"); int clone_id; ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids, - &clone_id), 0); + &clone_id, &cmap_item_weight), 0); ASSERT_TRUE(c.name_exists("default~ssd")); ASSERT_EQ(clone_id, c.get_item_id("default~ssd")); ASSERT_TRUE(c.subtree_contains(clone_id, item)); @@ -1160,13 +1162,13 @@ TEST(CrushWrapper, device_class_clone) { // cloning again does nothing and returns the existing one int other_clone_id; ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids, - &other_clone_id), 0); + &other_clone_id, &cmap_item_weight), 0); ASSERT_EQ(clone_id, other_clone_id); // invalid arguments ASSERT_EQ(c.device_class_clone(12345, cl, old_class_bucket, used_ids, - &other_clone_id), -ECHILD); + &other_clone_id, &cmap_item_weight), -ECHILD); ASSERT_EQ(c.device_class_clone(root_id, 12345, old_class_bucket, used_ids, - &other_clone_id), -EBADF); + &other_clone_id, &cmap_item_weight), -EBADF); } TEST(CrushWrapper, split_id_class) { @@ -1184,11 +1186,12 @@ TEST(CrushWrapper, split_id_class) { c.class_map[item] = class_id; map> old_class_bucket; + map>> cmap_item_weight; // cargs -> bno -> weights set used_ids; int item_id = c.get_item_id("default"); int clone_id; ASSERT_EQ(c.device_class_clone(item_id, class_id, old_class_bucket, used_ids, - &clone_id), 0); + &clone_id, &cmap_item_weight), 0); int retrieved_item_id; int retrieved_class_id; ASSERT_EQ(c.split_id_class(clone_id, &retrieved_item_id, &retrieved_class_id), 0); -- 2.39.5