From: Sage Weil Date: Sun, 14 Oct 2018 19:57:59 +0000 (-0500) Subject: crushtool: add --set-subtree-class; do not set class via --reclassify-root X-Git-Tag: v14.1.0~788^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7a80f993f4d79861cf72d7948a4bb48b0dd63eec;p=ceph.git crushtool: add --set-subtree-class; do not set class via --reclassify-root Sometimes we don't want the --reclassify-root to set the class of every device because a small number of them are (correctly) a different class. Allow both behaviors by adding a new, separate command to set the class of all devices beneath a point in the hierarchy and do not implicitly do that relabeling as part of --reclassify-root. Signed-off-by: Sage Weil --- diff --git a/doc/rados/operations/crush-map-edits.rst b/doc/rados/operations/crush-map-edits.rst index 4f1a853f7f20..e77b314082ab 100644 --- a/doc/rados/operations/crush-map-edits.rst +++ b/doc/rados/operations/crush-map-edits.rst @@ -524,12 +524,12 @@ There are three types of transformations possible: #. ``--reclassify-root `` - This will take everything in the hierarchy beneath root-name, mark - all devices with the specified class, and adjust any rules that - reference that root via a ``take `` to instead ``take - class ``. It renumbers the buckets in - such a way that the old IDs are instead used for the specified - class's "shadow tree" so that no data movement takes place. + This will take everything in the hierarchy beneath root-name and + adjust any rules that reference that root via a ``take + `` to instead ``take class ``. + It renumbers the buckets in such a way that the old IDs are instead + used for the specified class's "shadow tree" so that no data + movement takes place. For example, imagine you have an existing rule like:: @@ -556,6 +556,22 @@ There are three types of transformations possible: step emit } +#. ``--set-subtree-class `` + + This will mark every device in the subtree rooted at *bucket-name* + with the specified device class. + + This is normally used in conjunction with the ``--reclassify-root`` + option to ensure that all devices in that root are labeled with the + correct class. In some situations, however, some of those devices + (correctly) have a different class and we do not want to relabel + them. In such cases, one can exclude the ``--set-subtree-class`` + option. This means that the remapping process will not be perfect, + since the previous rule distributed across devices of multiple + classes but the adjusted rules will only map to devices of the + specified *device-class*, but that often is an accepted level of + data movement when the nubmer of outlier devices is small. + #. ``--reclassify-bucket `` This will allow you to merge a parallel type-specific hiearchy with the normal hierarchy. For example, many users have maps like:: @@ -624,6 +640,7 @@ The final command to convert the map comprised of the above fragments would be s $ ceph osd getcrushmap -o original $ crushtool -i original --reclassify \ + --set-subtree-class default hdd \ --reclassify-root default hdd \ --reclassify-bucket %-ssd ssd default \ --reclassify-bucket ssd ssd default \ diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 2b735d93f8cf..c428fade86b6 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -1645,6 +1645,44 @@ int32_t CrushWrapper::_alloc_class_id() const { ceph_abort_msg("no available class id"); } +int CrushWrapper::set_subtree_class( + const string& subtree, + const string& new_class) +{ + if (!name_exists(subtree)) { + return -ENOENT; + } + + int new_class_id = -1; + if (class_exists(new_class)) { + new_class_id = get_class_id(new_class); + } else { + for (new_class_id = 1; class_name.count(new_class_id); ++new_class_id) ; + class_name[new_class_id] = new_class; + class_rname[new_class] = new_class_id; + } + + int id = get_item_id(subtree); + list q = { id }; + while (!q.empty()) { + int id = q.front(); + q.pop_front(); + crush_bucket *b = get_bucket(id); + if (IS_ERR(b)) { + return PTR_ERR(b); + } + for (unsigned i = 0; i < b->size; ++i) { + int item = b->items[i]; + if (item >= 0) { + class_map[item] = new_class_id; + } else { + q.push_back(item); + } + } + } + return 0; +} + int CrushWrapper::reclassify( CephContext *cct, ostream& out, @@ -1742,8 +1780,8 @@ int CrushWrapper::reclassify( if (bucket->items[j] < 0) { q.push_front(bucket->items[j]); } else { - // reclassify device - class_map[bucket->items[j]] = new_class_id; + // we don't reclassify the device here; if the users wants that, + // they can pass --set-subtree-class separately. } } } diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 08c5759e85b4..a0edf3e9af59 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1310,6 +1310,8 @@ public: const map>& classify_bucket ); + int set_subtree_class(const string& name, const string& class_name); + void start_choose_profile() { free(crush->choose_tries); /* diff --git a/src/test/cli/crushtool/crush-classes/f b/src/test/cli/crushtool/crush-classes/f new file mode 100644 index 000000000000..c91e42666ffc Binary files /dev/null and b/src/test/cli/crushtool/crush-classes/f differ diff --git a/src/test/cli/crushtool/help.t b/src/test/cli/crushtool/help.t index f343c0e4f3a2..4b2e056c89ba 100644 --- a/src/test/cli/crushtool/help.t +++ b/src/test/cli/crushtool/help.t @@ -114,6 +114,8 @@ by adding classes --reclassify-bucket --reclassify-root + --set-subtree-class + set class for all items beneath bucket-name --compare compare two maps using --test parameters Options for the output stage diff --git a/src/test/cli/crushtool/reclassify.t b/src/test/cli/crushtool/reclassify.t index 720c8b38c0bb..93018014c668 100644 --- a/src/test/cli/crushtool/reclassify.t +++ b/src/test/cli/crushtool/reclassify.t @@ -1,6 +1,6 @@ - $ crushtool -i $TESTDIR/crush-classes/a --reclassify --reclassify-bucket %-ssd ssd default --reclassify-bucket ssd ssd default --reclassify-root default hdd -o foo + $ crushtool -i $TESTDIR/crush-classes/a --set-subtree-class default hdd --reclassify --reclassify-bucket %-ssd ssd default --reclassify-bucket ssd ssd default --reclassify-root default hdd -o foo classify_root default (-1) as hdd - created new class hdd as 1 + new class hdd exists as 1 renumbering bucket -1 -> -5 renumbering bucket -4 -> -6 renumbering bucket -3 -> -7 @@ -26,9 +26,9 @@ rule 1 had 0/10240 mismatched mappings (0) maps appear equivalent - $ crushtool -i $TESTDIR/crush-classes/d --reclassify --reclassify-bucket %-ssd ssd default --reclassify-bucket ssd ssd default --reclassify-root default hdd -o foo + $ crushtool -i $TESTDIR/crush-classes/d --set-subtree-class default hdd --reclassify --reclassify-bucket %-ssd ssd default --reclassify-bucket ssd ssd default --reclassify-root default hdd -o foo classify_root default (-1) as hdd - created new class hdd as 1 + new class hdd exists as 1 renumbering bucket -1 -> -13 renumbering bucket -6 -> -14 renumbering bucket -5 -> -15 @@ -168,7 +168,7 @@ because the new map has a strictly summing hierarchy. warning: maps are NOT equivalent [1] - $ crushtool -i $TESTDIR/crush-classes/beesly --reclassify --reclassify-root 0513-R-0050 hdd --reclassify-root 0513-R-0060 hdd -o foo + $ crushtool -i $TESTDIR/crush-classes/beesly --set-subtree-class 0513-R-0060 hdd --set-subtree-class 0513-R-0050 hdd --reclassify --reclassify-root 0513-R-0050 hdd --reclassify-root 0513-R-0060 hdd -o foo classify_root 0513-R-0050 (-2) as hdd new class hdd exists as 0 renumbering bucket -2 -> -131 @@ -371,7 +371,45 @@ this makes changes, but it doesn't really clean up the map, which is a mess! $ crushtool -i $TESTDIR/crush-classes/b --compare foo - rule 0 had 3060/3072 mismatched mappings (0.996094) + rule 0 had 3068/3072 mismatched mappings (0.998698) rule 1 had 4096/4096 mismatched mappings (1) warning: maps are NOT equivalent [1] + + $ crushtool -i $TESTDIR/crush-classes/f --reclassify --reclassify-root default hdd -o foo + classify_root default (-1) as hdd + new class hdd exists as 0 + renumbering bucket -1 -> -178 + renumbering bucket -4 -> -179 + renumbering bucket -25 -> -180 + renumbering bucket -16 -> -181 + renumbering bucket -21 -> -182 + renumbering bucket -19 -> -183 + renumbering bucket -15 -> -184 + renumbering bucket -7 -> -185 + renumbering bucket -47 -> -186 + renumbering bucket -18 -> -187 + renumbering bucket -8 -> -188 + renumbering bucket -6 -> -189 + renumbering bucket -12 -> -190 + renumbering bucket -23 -> -191 + renumbering bucket -22 -> -192 + renumbering bucket -20 -> -193 + renumbering bucket -11 -> -194 + renumbering bucket -10 -> -195 + renumbering bucket -17 -> -196 + renumbering bucket -13 -> -197 + renumbering bucket -9 -> -198 + renumbering bucket -3 -> -199 + renumbering bucket -14 -> -200 + renumbering bucket -5 -> -201 + renumbering bucket -2 -> -202 + +We expect some mismatches below because there are some ssd-labeled nodes under +default that we aren't changing the class on. + + $ crushtool -i $TESTDIR/crush-classes/f --compare foo + rule 0 had 627/10240 mismatched mappings (0.0612305) + rule 1 had 422/6144 mismatched mappings (0.0686849) + warning: maps are NOT equivalent + [1] diff --git a/src/tools/crushtool.cc b/src/tools/crushtool.cc index 8bc1c3fd5e32..d6a01864ec69 100644 --- a/src/tools/crushtool.cc +++ b/src/tools/crushtool.cc @@ -222,6 +222,8 @@ void usage() cout << " by adding classes\n"; cout << " --reclassify-bucket \n"; cout << " --reclassify-root \n"; + cout << " --set-subtree-class \n"; + cout << " set class for all items beneath bucket-name\n"; cout << " --compare compare two maps using --test parameters\n"; cout << "\n"; cout << "Options for the output stage\n"; @@ -416,6 +418,7 @@ int main(int argc, const char **argv) bool reclassify = false; map> reclassify_bucket; // %suffix or prefix% -> class, default_root map reclassify_root; // bucket -> class + map set_subtree_class; // bucket -> class string compare; @@ -479,6 +482,14 @@ int main(int argc, const char **argv) } reclassify_root[val] = *i; i = args.erase(i); + } else if (ceph_argparse_witharg(args, i, &val, "--set-subtree-class", + (char*)NULL)) { + if (i == args.end()) { + cerr << "expecting additional argument" << std::endl; + return EXIT_FAILURE; + } + set_subtree_class[val] = *i; + i = args.erase(i); } else if (ceph_argparse_flag(args, i, "--tree", (char*)NULL)) { tree = true; } else if (ceph_argparse_witharg(args, i, &val, "-f", "--format", (char*)NULL)) { @@ -1154,6 +1165,10 @@ int main(int argc, const char **argv) modified = true; } + for (auto& i : set_subtree_class) { + crush.set_subtree_class(i.first, i.second); + modified = true; + } if (reclassify) { int r = crush.reclassify( g_ceph_context,