From 5137cc656548d942d0f21ba3ef28a5b7d4d21831 Mon Sep 17 00:00:00 2001 From: Rongze Zhu Date: Fri, 10 Oct 2014 19:18:00 +0800 Subject: [PATCH] crush: fix incorrect use of adjust_item_weight method adjust_item_weight method will adjust all buckets which the item inside. If the osd.0 in host=fake01 and host=fake02, we execute "ceph osd crush osd.0 10 host=fake01", it not only will adjust fake01's weight, but also will adjust fake02's weight. the patch add adjust_item_weightf_in_loc method and fix remove_item, _remove_item_under, update_item, insert_item, detach_bucket methods. Signed-off-by: Rongze Zhu (cherry picked from commit 9850227d2f0ca2f692a154de2c14a0a08e751f08) Conflicts: src/crush/CrushWrapper.cc --- src/crush/CrushWrapper.cc | 60 ++++++++-- src/crush/CrushWrapper.h | 11 +- src/test/cli/crushtool/adjust-item-weight.t | 17 +++ .../cli/crushtool/simple.template.adj.one | 56 +++++++++ .../cli/crushtool/simple.template.adj.three | 64 ++++++++++ .../cli/crushtool/simple.template.adj.two | 64 ++++++++++ src/test/crush/TestCrushWrapper.cc | 110 ++++++++++++++++++ 7 files changed, 372 insertions(+), 10 deletions(-) create mode 100644 src/test/cli/crushtool/adjust-item-weight.t create mode 100644 src/test/cli/crushtool/simple.template.adj.one create mode 100644 src/test/cli/crushtool/simple.template.adj.three create mode 100644 src/test/cli/crushtool/simple.template.adj.two diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 754c9eff2f367..cbdf719014d57 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -150,10 +150,10 @@ int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only) for (unsigned i=0; isize; ++i) { int id = b->items[i]; if (id == item) { - adjust_item_weight(cct, item, 0); ldout(cct, 5) << "remove_item removing item " << item << " from bucket " << b->id << dendl; crush_bucket_remove_item(b, item); + adjust_item_weight(cct, b->id, b->weight); ret = 0; } } @@ -197,9 +197,9 @@ int CrushWrapper::_remove_item_under(CephContext *cct, int item, int ancestor, b for (unsigned i=0; isize; ++i) { int id = b->items[i]; if (id == item) { - adjust_item_weight(cct, item, 0); ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl; crush_bucket_remove_item(b, item); + adjust_item_weight(cct, b->id, b->weight); ret = 0; } else if (id < 0) { int r = remove_item_under(cct, item, id, unlink_only); @@ -459,6 +459,8 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n int cur = item; + // create locations if locations don't exist and add child in location with 0 weight + // the more detail in the insert_item method declaration in CrushWrapper.h for (map::iterator p = type_map.begin(); p != type_map.end(); ++p) { // ignore device type if (p->first == 0) @@ -520,15 +522,15 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n << " to bucket " << id << dendl; int r = crush_bucket_add_item(b, cur, 0); assert (!r); + break; + } - // now that we've added the (0-weighted) item and any parent buckets, adjust the weight. - adjust_item_weightf(cct, item, weight); - + // adjust the item's weight in location + if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) { if (item >= crush->max_devices) { crush->max_devices = item + 1; ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl; } - return 0; } @@ -620,7 +622,7 @@ int CrushWrapper::update_item(CephContext *cct, int item, float weight, string n if (old_iweight != iweight) { ldout(cct, 5) << "update_item " << item << " adjusting weight " << ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl; - adjust_item_weight(cct, item, iweight); + adjust_item_weight_in_loc(cct, item, iweight, loc); ret = 1; } if (get_item_name(item) != name) { @@ -654,6 +656,24 @@ int CrushWrapper::get_item_weight(int id) return -ENOENT; } +int CrushWrapper::get_item_weight_in_loc(int id, const map &loc) +{ + for (map::const_iterator l = loc.begin(); l != loc.end(); l++) { + int bid = get_item_id(l->second); + if (!bucket_exists(bid)) + continue; + crush_bucket *b = get_bucket(bid); + if ( b == NULL) + continue; + for (unsigned int i = 0; i < b->size; i++) { + if (b->items[i] == id) { + return crush_get_bucket_item_weight(b, i); + } + } + } + return -ENOENT; +} + int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight) { ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl; @@ -676,6 +696,32 @@ int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight) return changed; } +int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map& loc) +{ + ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl; + int changed = 0; + + for (map::const_iterator l = loc.begin(); l != loc.end(); l++) { + int bid = get_item_id(l->second); + if (!bucket_exists(bid)) + continue; + crush_bucket *b = get_bucket(bid); + if ( b == NULL) + continue; + for (unsigned int i = 0; i < b->size; i++) { + if (b->items[i] == id) { + int diff = crush_bucket_adjust_item_weight(b, id, weight); + ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl; + adjust_item_weight(cct, bid, b->weight); + changed++; + } + } + } + if (!changed) + return -ENOENT; + return changed; +} + bool CrushWrapper::check_item_present(int id) { bool found = false; diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 08cbb2e4d86a1..525051ffece72 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -563,11 +563,19 @@ public: float get_item_weightf(int id) { return (float)get_item_weight(id) / (float)0x10000; } + int get_item_weight_in_loc(int id, const map &loc); + float get_item_weightf_in_loc(int id, const map &loc) { + return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; + } int adjust_item_weight(CephContext *cct, int id, int weight); int adjust_item_weightf(CephContext *cct, int id, float weight) { return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); } + int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map& loc); + int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map& loc) { + return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); + } void reweight(CephContext *cct); /// check if item id is present in the map hierarchy @@ -762,9 +770,6 @@ private: crush_bucket *b = get_bucket(item); unsigned bucket_weight = b->weight; - // zero out the bucket weight - adjust_item_weight(cct, item, 0); - // get where the bucket is located pair bucket_location = get_immediate_parent(item); diff --git a/src/test/cli/crushtool/adjust-item-weight.t b/src/test/cli/crushtool/adjust-item-weight.t new file mode 100644 index 0000000000000..16d7135ca6cb7 --- /dev/null +++ b/src/test/cli/crushtool/adjust-item-weight.t @@ -0,0 +1,17 @@ + $ crushtool -i "$TESTDIR/simple.template" --add-item 0 1.0 device0 --loc host host0 --loc cluster cluster0 -o one > /dev/null + +# +# add device0 into host=fake, the weight of device0 in host=host0 is 1.0, the weight of device0 in host=fake is 2.0 +# + + $ crushtool -i one --add-item 0 2.0 device0 --loc host fake --loc cluster cluster0 -o two > /dev/null + $ crushtool -d two -o final + $ cmp final "$TESTDIR/simple.template.adj.two" + +# +# update the weight of device0 in host=host0, it will not affect the weight of device0 in host=fake +# + + $ crushtool -i two --update-item 0 3.0 device0 --loc host host0 --loc cluster cluster0 -o three > /dev/null + $ crushtool -d three -o final + $ cmp final "$TESTDIR/simple.template.adj.three" diff --git a/src/test/cli/crushtool/simple.template.adj.one b/src/test/cli/crushtool/simple.template.adj.one new file mode 100644 index 0000000000000..aa16bbdedc327 --- /dev/null +++ b/src/test/cli/crushtool/simple.template.adj.one @@ -0,0 +1,56 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 1.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 1.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff --git a/src/test/cli/crushtool/simple.template.adj.three b/src/test/cli/crushtool/simple.template.adj.three new file mode 100644 index 0000000000000..fca0fe162f437 --- /dev/null +++ b/src/test/cli/crushtool/simple.template.adj.three @@ -0,0 +1,64 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 3.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 3.000 +} +host fake { + id -3 # do not change unnecessarily + # weight 2.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 2.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 5.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 3.000 + item fake weight 2.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff --git a/src/test/cli/crushtool/simple.template.adj.two b/src/test/cli/crushtool/simple.template.adj.two new file mode 100644 index 0000000000000..21c29a3afefc8 --- /dev/null +++ b/src/test/cli/crushtool/simple.template.adj.two @@ -0,0 +1,64 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 1.000 +} +host fake { + id -3 # do not change unnecessarily + # weight 2.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 2.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 3.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 1.000 + item fake weight 2.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff --git a/src/test/crush/TestCrushWrapper.cc b/src/test/crush/TestCrushWrapper.cc index 34d6401a68989..5eefaf15ef955 100644 --- a/src/test/crush/TestCrushWrapper.cc +++ b/src/test/crush/TestCrushWrapper.cc @@ -290,6 +290,116 @@ TEST(CrushWrapper, update_item) { delete c; } +TEST(CrushWrapper, adjust_item_weight) { + CrushWrapper *c = new CrushWrapper; + + const int ROOT_TYPE = 2; + c->set_type_name(ROOT_TYPE, "root"); + const int HOST_TYPE = 1; + c->set_type_name(HOST_TYPE, "host"); + const int OSD_TYPE = 0; + c->set_type_name(OSD_TYPE, "osd"); + + int rootno; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, 0, NULL, NULL, &rootno); + c->set_item_name(rootno, "default"); + + const string HOST0("host0"); + int host0; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + HOST_TYPE, 0, NULL, NULL, &host0); + c->set_item_name(host0, HOST0); + + const string FAKE("fake"); + int hostfake; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + HOST_TYPE, 0, NULL, NULL, &hostfake); + c->set_item_name(hostfake, FAKE); + + int item = 0; + + // construct crush map + + { + map loc; + loc["host"] = "host0"; + float host_weight = 2.0; + int bucket_id = 0; + + item = 0; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + item = 1; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + + bucket_id = c->get_item_id("host0"); + EXPECT_EQ(true, c->bucket_exists(bucket_id)); + EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id)); + + } + + { + map loc; + loc["host"] = "fake"; + float host_weight = 2.0; + int bucket_id = 0; + + item = 0; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + item = 1; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + + bucket_id = c->get_item_id("fake"); + EXPECT_EQ(true, c->bucket_exists(bucket_id)); + EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id)); + } + + // + // When there is: + // + // default --> host0 --> osd.0 1.0 + // | | + // | +-> osd.1 1.0 + // | + // +-> fake --> osd.0 1.0 + // | + // +-> osd.1 1.0 + // + // Trying to adjust osd.0 weight to 2.0 in all buckets + // Trying to adjust osd.1 weight to 2.0 in host=fake + // + // So the crush map will be: + // + // default --> host0 --> osd.0 2.0 + // | | + // | +-> osd.1 1.0 + // | + // +-> fake --> osd.0 2.0 + // | + // +-> osd.1 2.0 + // + + float original_weight = 1.0; + float modified_weight = 2.0; + map loc_one, loc_two; + loc_one["host"] = "host0"; + loc_two["host"] = "fake"; + + item = 0; + EXPECT_EQ(2, c->adjust_item_weightf(g_ceph_context, item, modified_weight)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_one)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two)); + + item = 1; + EXPECT_EQ(1, c->adjust_item_weightf_in_loc(g_ceph_context, item, modified_weight, loc_two)); + EXPECT_EQ(original_weight, c->get_item_weightf_in_loc(item, loc_one)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two)); +} + TEST(CrushWrapper, insert_item) { CrushWrapper *c = new CrushWrapper; -- 2.39.5