From 884414c5788bac9a269f01b26cbc0c55850c34f6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 2 Dec 2014 16:33:11 -0800 Subject: [PATCH] crush: fix crush_calc_straw() scalers when there are duplicate weights The straw bucket was originally tested with uniform weights and with a few more complicated patterns, like a stair step (1,2,3,4,5,6,7,8,9). And it worked! However, it does not behave with a pattern like 1, 2, 2, 3, 3, 4, 4 Strangely, it does behave with 1, 1, 2, 2, 3, 3, 4, 4 and more usefully it does behave with 1, 2, 2.001, 3, 3.001, 4, 4.001 That is, the logic that explicitly copes with weights that are duplicates is broken. The fix is to simply remove the special handling for duplicate weights -- it isn't necessary and doesn't work correctly anyway. Add a test that compares the mapping result of [1, 2, 2, 3, 3, ...] with [1, 2, 2.001, 3, 3.001, ...] and verifies that the difference is small. With the fix, we get .00012, whereas the original implementation gets .015. Note that this changes the straw bucket scalar *precalculated* values that are encoded with the map, and only when the admin opts into the new behavior. Backport: giant, firefly Signed-off-by: Sage Weil (cherry picked from commit 43d5c7caa7ce478477bde1bbd4f0649b5159cdcf) --- src/crush/CrushWrapper.h | 2 + src/crush/builder.c | 12 +--- src/test/crush/TestCrushWrapper.cc | 108 +++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 11 deletions(-) diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 1839681f153dc..efe9dbb65ba07 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -88,6 +88,8 @@ public: crush_destroy(crush); } + crush_map *get_crush_map() { return crush; } + /* building */ void create() { if (crush) diff --git a/src/crush/builder.c b/src/crush/builder.c index 93bd6c417b126..0d84aea7d9e9b 100644 --- a/src/crush/builder.c +++ b/src/crush/builder.c @@ -502,20 +502,10 @@ int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket) if (i == size) break; - /* same weight as previous? */ - if (weights[reverse[i]] == weights[reverse[i-1]]) { - dprintk("same as previous\n"); - continue; - } - /* adjust straw for next guy */ wbelow += ((double)weights[reverse[i-1]] - lastw) * numleft; - for (j=i; jset_type_name(ROOT_TYPE, "root"); + const int OSD_TYPE = 0; + c->set_type_name(OSD_TYPE, "osd"); + + int n = 10; + int items[n], weights[n]; + for (int i=0; i set_max_devices(n); + + string root_name0("root0"); + int root0; + EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n, items, weights, &root0)); + EXPECT_EQ(0, c->set_item_name(root0, root_name0)); + + string name0("rule0"); + int ruleset0 = c->add_simple_ruleset(name0, root_name0, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(0, ruleset0); + + for (int i=0; i add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n, items, weights, &root1)); + EXPECT_EQ(0, c->set_item_name(root1, root_name1)); + + string name1("rule1"); + int ruleset1 = c->add_simple_ruleset(name1, root_name1, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(1, ruleset1); + + if (0) { + crush_bucket_straw *sb0 = reinterpret_cast(c->get_crush_map()->buckets[-1-root0]); + crush_bucket_straw *sb1 = reinterpret_cast(c->get_crush_map()->buckets[-1-root1]); + + for (int i=0; iitem_weights[i] + << "\t" << sb1->item_weights[i] + << "\t" + << "\t" << sb0->straws[i] + << "\t" << sb1->straws[i] + << std::endl; + } + } + + if (0) { + JSONFormatter jf(true); + jf.open_object_section("crush"); + c->dump(&jf); + jf.close_section(); + jf.flush(cout); + } + + vector sum0(n, 0), sum1(n, 0); + vector reweight(n, 0x10000); + int different = 0; + int max = 100000; + for (int i=0; i out0, out1; + c->do_rule(ruleset0, i, out0, 1, reweight); + ASSERT_EQ(1u, out0.size()); + c->do_rule(ruleset1, i, out1, 1, reweight); + ASSERT_EQ(1u, out1.size()); + sum0[out0[0]]++; + sum1[out1[0]]++; + if (out0[0] != out1[0]) + different++; + } + for (int i=0; i