From: Mykola Golub Date: Tue, 15 Jun 2021 05:46:39 +0000 (+0300) Subject: tools/crushdiff: new tool to test crushmap change X-Git-Tag: v17.1.0~979^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6c73184b215d295e44bbc2360f502da1c8c5b854;p=ceph.git tools/crushdiff: new tool to test crushmap change A tool to test the effect (number of pgs, objects, bytes moved) of a crushmap change. This is a wrapper around osdmaptool, hardly relying on its --test-map-pgs-dump option to get the list of changed pgs. Additionally it uses pg stats to calculate the numbers of objects and bytes moved. Signed-off-by: Mykola Golub --- diff --git a/ceph.spec.in b/ceph.spec.in index 159d99d982b5..5fe197417a2a 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1577,6 +1577,7 @@ exit 0 %{_bindir}/cephfs-data-scan %{_bindir}/cephfs-journal-tool %{_bindir}/cephfs-table-tool +%{_bindir}/crushdiff %{_bindir}/rados %{_bindir}/radosgw-admin %{_bindir}/rbd diff --git a/debian/ceph-common.install b/debian/ceph-common.install index b1c5769fd53e..2ec671b357cf 100755 --- a/debian/ceph-common.install +++ b/debian/ceph-common.install @@ -15,6 +15,7 @@ usr/bin/ceph-syn usr/bin/cephfs-data-scan usr/bin/cephfs-journal-tool usr/bin/cephfs-table-tool +usr/bin/crushdiff usr/bin/rados usr/bin/radosgw-admin usr/bin/rbd diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index fdfde4f34ef9..fa10db7f3adc 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -96,6 +96,8 @@ add_executable(osdmaptool ${osdomaptool_srcs}) target_link_libraries(osdmaptool global) install(TARGETS osdmaptool DESTINATION bin) +install(PROGRAMS crushdiff DESTINATION bin) + set(ceph-diff-sorted_srcs ceph-diff-sorted.cc) add_executable(ceph-diff-sorted ${ceph-diff-sorted_srcs}) set_target_properties(ceph-diff-sorted PROPERTIES diff --git a/src/tools/crushdiff b/src/tools/crushdiff new file mode 100755 index 000000000000..ca9173379731 --- /dev/null +++ b/src/tools/crushdiff @@ -0,0 +1,335 @@ +#!/usr/bin/python3 +# +# A tool to test the effect (number of pgs, objects, bytes moved) of a +# crushmap change. This is a wrapper around osdmaptool, hardly relying +# on its --test-map-pgs-dump option to get the list of changed pgs. +# Additionally it uses pg stats to calculate the numbers of objects +# and bytes moved. +# +# Typical usage: +# +# # Get current crushmap +# $ crushdiff export cm.txt +# # Edit the map +# $ $EDITOR cm.txt +# # Check the result +# $ crushdiff compare cm.txt +# # Install the updated map +# $ crushdiff import cm.txt +# +# By default, crushdiff will use the cluster current osdmap and pg +# stats, which requires access to the cluster. But one can use the +# --osdmap and --pg-dump options to test against previously obtained +# data. +# + +import argparse +import re +import json +import os +import sys +import tempfile + +# +# Global +# + +parser = argparse.ArgumentParser(prog='crushdiff', + description='Tool for updating crush map') +parser.add_argument( + 'command', + metavar='compare|export|import', + help='command', + default=None, +) +parser.add_argument( + '-c', '--compiled', + action='store_true', + help='use compiled crush map', + default=False, +) +parser.add_argument( + 'crushmap', + metavar='crushmap', + help='crushmap json file', + default=None, +) +parser.add_argument( + '-m', '--osdmap', + metavar='osdmap', + help='', + default=None, +) +parser.add_argument( + '-p', '--pg-dump', + metavar='pg-dump', + help='`ceph pg dump` json output', + default=None, +) +parser.add_argument( + '-v', '--verbose', + action='store_true', + help='be verbose', + default=False, +) + +# +# Functions +# + +def get_human_readable(bytes, precision=2): + suffixes = ['', 'Ki', 'Mi', 'Gi', 'Ti'] + suffix_index = 0 + while bytes > 1024 and suffix_index < 4: + # increment the index of the suffix + suffix_index += 1 + # apply the division + bytes = bytes / 1024.0 + return '%.*f%s' % (precision, bytes, suffixes[suffix_index]) + +def run_cmd(cmd, verbose=False): + if verbose: + print(cmd, file=sys.stderr, flush=True) + os.system(cmd) + +def get_osdmap(file): + with open(file, "r") as f: + return json.load(f) + +def get_pools(osdmap): + return {p['pool']: p for p in osdmap['pools']} + +def get_erasure_code_profiles(osdmap): + return osdmap['erasure_code_profiles'] + +def get_pgmap(pg_dump_file): + with open(pg_dump_file, "r") as f: + return json.load(f)['pg_map'] + +def get_pg_stats(pgmap): + return {pg['pgid']: pg for pg in pgmap['pg_stats']} + +def parse_test_map_pgs_dump(file): + # Format: + # pool 1 pg_num 16 + # 1.0 [1,0,2] 1 + # 1.1 [2,0,1] 2 + # ... + # pool 2 pg_num 32 + # 2.0 [2,1,0] 2 + # 2.1 [2,1,0] 2 + # ... + # #osd count first primary c wt wt + # osd.1 208 123 123 0.098587 1 + + pgs = {} + + with open(file, "r") as f: + pool = None + for l in f.readlines(): + m = re.match('^pool (\d+) pg_num (\d+)', l) + if m: + pool = m.group(1) + continue + if not pool: + continue + m = re.match('^#osd', l) + if m: + break + m = re.match('^(\d+\.[0-9a-f]+)\s+\[([\d,]+)\]', l) + if not m: + continue + pgid = m.group(1) + osds = [int(x) for x in m.group(2).split(',')] + pgs[pgid] = osds + + return pgs + +def do_compare(new_crushmap_in, osdmap=None, pg_dump=None, compiled=False, + verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if compiled: + new_crushmap_file = new_crushmap_in + else: + new_crushmap_file = os.path.join(tmpdirname, 'crushmap') + run_cmd('crushtool -c {} -o {}'.format(new_crushmap_in, + new_crushmap_file), verbose) + + osdmap_file = os.path.join(tmpdirname, 'osdmap') + if osdmap: + run_cmd('cp {} {}'.format(osdmap, osdmap_file), verbose) + else: + run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose) + + if not pg_dump: + pg_dump = os.path.join(tmpdirname, 'pg_dump.json') + run_cmd('ceph pg dump --format json > {}'.format(pg_dump), verbose) + + old_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.old.txt') + run_cmd('osdmaptool {} --test-map-pgs-dump > {}'.format( + osdmap_file, old_test_map_pgs_dump), verbose) + if verbose: + run_cmd('cat {} >&2'.format(old_test_map_pgs_dump), True) + + new_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.new.txt') + run_cmd( + 'osdmaptool {} --import-crush {} --test-map-pgs-dump > {}'.format( + osdmap_file, new_crushmap_file, new_test_map_pgs_dump), verbose) + if verbose: + run_cmd('cat {} >&2'.format(new_test_map_pgs_dump), True) + + osdmap_file_json = os.path.join(tmpdirname, 'osdmap.json') + run_cmd('osdmaptool {} --dump json > {}'.format( + osdmap_file, osdmap_file_json), verbose) + osdmap = get_osdmap(osdmap_file_json) + pools = get_pools(osdmap) + ec_profiles = get_erasure_code_profiles(osdmap) + + pgmap = get_pgmap(pg_dump) + pg_stats = get_pg_stats(pgmap) + + old_pgs = parse_test_map_pgs_dump(old_test_map_pgs_dump) + new_pgs = parse_test_map_pgs_dump(new_test_map_pgs_dump) + + diff_pg_count = 0 + total_object_count = 0 + diff_object_count = 0 + for pgid in old_pgs: + objects = pg_stats[pgid]['stat_sum']['num_objects'] + total_object_count += objects + + if old_pgs[pgid] == new_pgs[pgid]: + continue + + pool_id = int(pgid.split('.')[0]) + + if len(new_pgs[pgid]) < pools[pool_id]['size']: + print("WARNING: {} will be undersized ({})".format( + pgid, new_pgs[pgid]), file=sys.stderr, flush=True) + + if not pools[pool_id]['erasure_code_profile'] and \ + sorted(old_pgs[pgid]) == sorted(new_pgs[pgid]): + continue + + if verbose: + print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]), + file=sys.stderr, flush=True) + diff_pg_count += 1 + diff_object_count += objects + + print("{}/{} ({:.2f}%) pgs affected".format( + diff_pg_count, len(old_pgs), + 100 * diff_pg_count / len(old_pgs) if len(old_pgs) else 0), + flush=True) + print("{}/{} ({:.2f}%) objects affected".format( + diff_object_count, total_object_count, + 100 * diff_object_count / total_object_count \ + if total_object_count else 0), flush=True) + + total_pg_shard_count = 0 + diff_pg_shard_count = 0 + total_object_shard_count = 0 + diff_object_shard_count = 0 + total_bytes = 0 + diff_bytes = 0 + for pgid in old_pgs: + pool_id = int(pgid.split('.')[0]) + ec_profile = pools[pool_id]['erasure_code_profile'] + if ec_profile: + k = int(ec_profiles[ec_profile]['k']) + m = int(ec_profiles[ec_profile]['m']) + else: + k = 1 + m = pools[pool_id]['size'] - 1 + + bytes = pg_stats[pgid]['stat_sum']['num_bytes'] + \ + pg_stats[pgid]['stat_sum']['num_omap_bytes'] + objects = pg_stats[pgid]['stat_sum']['num_objects'] + + total_pg_shard_count += len(old_pgs[pgid]) + total_object_shard_count += objects * (k + m) + total_bytes += bytes * (k + m) / k + + if old_pgs[pgid] == new_pgs[pgid]: + continue + + old_count = diff_pg_shard_count + + if ec_profile: + for i in range(len(old_pgs[pgid])): + if old_pgs[pgid][i] != new_pgs[pgid][i]: + diff_pg_shard_count += 1 + diff_object_shard_count += objects + diff_bytes += bytes / k + else: + for osd in old_pgs[pgid]: + if osd not in new_pgs[pgid]: + diff_pg_shard_count += 1 + diff_object_shard_count += objects + diff_bytes += bytes / k + + if old_count == diff_pg_shard_count: + continue + + if verbose: + print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]), + file=sys.stderr, flush=True) + + print("{}/{} ({:.2f}%) pg shards to move".format( + diff_pg_shard_count, total_pg_shard_count, + 100 * diff_pg_shard_count / total_pg_shard_count \ + if total_pg_shard_count else 0), flush=True) + print("{}/{} ({:.2f}%) pg object shards to move".format( + diff_object_shard_count, total_object_shard_count, + 100 * diff_object_shard_count / total_object_shard_count \ + if total_object_shard_count else 0), flush=True) + print("{}/{} ({:.2f}%) bytes to move".format( + get_human_readable(int(diff_bytes)), + get_human_readable(int(total_bytes)), + 100 * diff_bytes / total_bytes if total_bytes else 0), + flush=True) + +def do_export(crushmap_out, osdmap_file=None, compiled=False, verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if not osdmap_file: + osdmap_file = os.path.join(tmpdirname, 'osdmap') + run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose) + + crushmap_file = crushmap_out if compiled else \ + os.path.join(tmpdirname, 'crushmap') + run_cmd('osdmaptool {} --export-crush {}'.format( + osdmap_file, crushmap_file), verbose) + if not compiled: + run_cmd('crushtool -d {} -o {}'.format(crushmap_file, crushmap_out), + verbose) + +def do_import(crushmap_in, osdmap=None, compiled=False, verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if compiled: + crushmap_file = crushmap_in + else: + crushmap_file = os.path.join(tmpdirname, 'crushmap') + run_cmd('crushtool -c {} -o {}'.format(crushmap_in, + crushmap_file), verbose) + if osdmap: + run_cmd('osdmaptool {} --import-crush {}'.format( + osdmap, crushmap_file), verbose) + else: + run_cmd('ceph osd setcrushmap -i {}'.format(crushmap_file), verbose) + +def main(): + args = parser.parse_args() + + if args.command == 'compare': + do_compare(args.crushmap, args.osdmap, args.pg_dump, args.compiled, + args.verbose) + elif args.command == 'export': + do_export(args.crushmap, args.osdmap, args.compiled, args.verbose) + elif args.command == 'import': + do_import(args.crushmap, args.osdmap, args.compiled, args.verbose) + +# +# main +# + +main()