From: Sage Weil Date: Thu, 22 Jan 2015 18:15:59 +0000 (-0800) Subject: crush: add allowed_bucket_algs tunable X-Git-Tag: v0.93~161^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7023eabd5f144a7d3b55b64e008eaf2b71fa62f6;p=ceph.git crush: add allowed_bucket_algs tunable This tunable is a bitmask indicating which bucket algorithms are allowed. For now, the only purpose is to affect get_default_bucket_alg(), which will try to pick a type that is supported, with a preference for straw2 or straw buckets. In the future, we likely want something a bit more sophisticated that reflects whether the bucket is expected to be fixed-size or not. We could have added a default_bucket_type field to accomplish this, but I think that would prove to be too limiting in the future, and this accomplishes the same thing. Note that if the admin selects the 'hammer' tunables, that means clients and servers will support straw2 and that will be the preferred choice. The default is still bobtail tunables, which means new clusters created on hammer will not use straw2 just yet. Signed-off-by: Sage Weil --- diff --git a/src/crush/CrushCompiler.cc b/src/crush/CrushCompiler.cc index 0e266cd3f886..e16692da4797 100644 --- a/src/crush/CrushCompiler.cc +++ b/src/crush/CrushCompiler.cc @@ -193,6 +193,9 @@ int CrushCompiler::decompile(ostream &out) out << "tunable chooseleaf_vary_r " << crush.get_chooseleaf_vary_r() << "\n"; if (crush.get_straw_calc_version() != 0) out << "tunable straw_calc_version " << crush.get_straw_calc_version() << "\n"; + if (crush.get_allowed_bucket_algs() != CRUSH_LEGACY_ALLOWED_BUCKET_ALGS) + out << "tunable allowed_bucket_algs " << crush.get_allowed_bucket_algs() + << "\n"; out << "\n# devices\n"; for (int i=0; ichooseleaf_descend_once, bl); ::encode(crush->chooseleaf_vary_r, bl); ::encode(crush->straw_calc_version, bl); + ::encode(crush->allowed_bucket_algs, bl); } static void decode_32_or_64_string_map(map& m, bufferlist::iterator& blp) @@ -1216,6 +1217,9 @@ void CrushWrapper::decode(bufferlist::iterator& blp) if (!blp.end()) { ::decode(crush->straw_calc_version, blp); } + if (!blp.end()) { + ::decode(crush->allowed_bucket_algs, blp); + } finalize(); } catch (...) { @@ -1413,6 +1417,7 @@ void CrushWrapper::dump_tunables(Formatter *f) const f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once()); f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r()); f->dump_int("straw_calc_version", get_straw_calc_version()); + f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs()); // be helpful about it if (has_firefly_tunables()) diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 632e9e49181d..cfafab067dee 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -107,6 +107,7 @@ public: crush->choose_total_tries = 19; crush->chooseleaf_descend_once = 0; crush->chooseleaf_vary_r = 0; + crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } void set_tunables_bobtail() { crush->choose_local_tries = 0; @@ -114,6 +115,7 @@ public: crush->choose_total_tries = 50; crush->chooseleaf_descend_once = 1; crush->chooseleaf_vary_r = 0; + crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } void set_tunables_firefly() { crush->choose_local_tries = 0; @@ -121,6 +123,7 @@ public: crush->choose_total_tries = 50; crush->chooseleaf_descend_once = 1; crush->chooseleaf_vary_r = 1; + crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } void set_tunables_hammer() { crush->choose_local_tries = 0; @@ -128,6 +131,11 @@ public: crush->choose_total_tries = 50; crush->chooseleaf_descend_once = 1; crush->chooseleaf_vary_r = 1; + crush->allowed_bucket_algs = + (1 << CRUSH_BUCKET_UNIFORM) | + (1 << CRUSH_BUCKET_LIST) | + (1 << CRUSH_BUCKET_STRAW) | + (1 << CRUSH_BUCKET_STRAW2); } void set_tunables_legacy() { @@ -185,6 +193,13 @@ public: crush->straw_calc_version = n; } + unsigned get_allowed_bucket_algs() const { + return crush->allowed_bucket_algs; + } + void set_allowed_bucket_algs(unsigned n) { + crush->allowed_bucket_algs = n; + } + bool has_argonaut_tunables() const { return crush->choose_local_tries == 2 && @@ -192,7 +207,8 @@ public: crush->choose_total_tries == 19 && crush->chooseleaf_descend_once == 0 && crush->chooseleaf_vary_r == 0 && - crush->straw_calc_version == 0; + crush->straw_calc_version == 0 && + crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } bool has_bobtail_tunables() const { return @@ -201,7 +217,8 @@ public: crush->choose_total_tries == 50 && crush->chooseleaf_descend_once == 1 && crush->chooseleaf_vary_r == 0 && - crush->straw_calc_version == 0; + crush->straw_calc_version == 0 && + crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } bool has_firefly_tunables() const { return @@ -210,7 +227,8 @@ public: crush->choose_total_tries == 50 && crush->chooseleaf_descend_once == 1 && crush->chooseleaf_vary_r == 1 && - crush->straw_calc_version == 0; + crush->straw_calc_version == 0 && + crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; } bool has_hammer_tunables() const { return @@ -219,8 +237,12 @@ public: crush->choose_total_tries == 50 && crush->chooseleaf_descend_once == 1 && crush->chooseleaf_vary_r == 1 && - crush->straw_calc_version == 1; - } + crush->straw_calc_version == 1 && + crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | + (1 << CRUSH_BUCKET_LIST) | + (1 << CRUSH_BUCKET_STRAW) | + (1 << CRUSH_BUCKET_STRAW2)); +} bool has_optimal_tunables() const { return has_firefly_tunables(); @@ -253,7 +275,18 @@ public: // default bucket types unsigned get_default_bucket_alg() const { - return CRUSH_BUCKET_STRAW; + // in order of preference + if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2)) + return CRUSH_BUCKET_STRAW2; + if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW)) + return CRUSH_BUCKET_STRAW; + if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE)) + return CRUSH_BUCKET_TREE; + if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST)) + return CRUSH_BUCKET_LIST; + if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM)) + return CRUSH_BUCKET_UNIFORM; + return 0; } // bucket types diff --git a/src/crush/builder.c b/src/crush/builder.c index 7e611906bd84..28d957db21f0 100644 --- a/src/crush/builder.c +++ b/src/crush/builder.c @@ -30,6 +30,10 @@ struct crush_map *crush_create() m->chooseleaf_descend_once = 0; m->chooseleaf_vary_r = 0; m->straw_calc_version = 0; + + // by default, use legacy types, and also exclude tree, + // since it was buggy. + m->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; return m; } diff --git a/src/crush/crush.h b/src/crush/crush.h index b2628bf24f46..5082c0319647 100644 --- a/src/crush/crush.h +++ b/src/crush/crush.h @@ -112,6 +112,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -198,6 +207,15 @@ struct crush_map { */ __u8 straw_calc_version; + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + __u32 *choose_tries; }; diff --git a/src/test/cli/osdmaptool/crush.t b/src/test/cli/osdmaptool/crush.t index 584da09f4c65..6be17436fa74 100644 --- a/src/test/cli/osdmaptool/crush.t +++ b/src/test/cli/osdmaptool/crush.t @@ -6,5 +6,5 @@ osdmaptool: exported crush map to oc $ osdmaptool --import-crush oc myosdmap osdmaptool: osdmap file 'myosdmap' - osdmaptool: imported 487 byte crush map from oc + osdmaptool: imported 491 byte crush map from oc osdmaptool: writing epoch 3 to myosdmap diff --git a/src/tools/crushtool.cc b/src/tools/crushtool.cc index cb3d628ca4dc..fa678c384522 100644 --- a/src/tools/crushtool.cc +++ b/src/tools/crushtool.cc @@ -198,6 +198,7 @@ int main(int argc, const char **argv) int chooseleaf_descend_once = -1; int chooseleaf_vary_r = -1; int straw_calc_version = -1; + int allowed_bucket_algs = -1; CrushWrapper crush; @@ -280,6 +281,9 @@ int main(int argc, const char **argv) } else if (ceph_argparse_withint(args, i, &straw_calc_version, &err, "--set_straw_calc_version", (char*)NULL)) { adjust = true; + } else if (ceph_argparse_withint(args, i, &allowed_bucket_algs, &err, + "--set_allowed_bucket_algs", (char*)NULL)) { + adjust = true; } else if (ceph_argparse_flag(args, i, "--reweight", (char*)NULL)) { reweight = true; } else if (ceph_argparse_withint(args, i, &add_item, &err, "--add_item", (char*)NULL)) { @@ -744,6 +748,10 @@ int main(int argc, const char **argv) crush.set_straw_calc_version(straw_calc_version); modified = true; } + if (allowed_bucket_algs >= 0) { + crush.set_allowed_bucket_algs(allowed_bucket_algs); + modified = true; + } if (modified) { crush.finalize();