From b6e820eb43c83cfb9abebb0a6d877ddb8adc9f35 Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Wed, 22 Jun 2022 15:54:05 +0900 Subject: [PATCH] tool/ceph-dedup-tool: replace a way to print options with boost program options Signed-off-by: Myoungwon Oh --- src/tools/ceph_dedup_tool.cc | 486 +++++++++++++++-------------------- 1 file changed, 212 insertions(+), 274 deletions(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index ba2a9f518c8..55e645fa79c 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -49,7 +49,11 @@ #include "global/signal_handler.h" #include "common/CDC.h" +#include +#include + using namespace std; +namespace po = boost::program_options; struct EstimateResult { std::unique_ptr cdc; @@ -131,34 +135,53 @@ unsigned default_max_thread = 2; int32_t default_report_period = 10; ceph::mutex glock = ceph::make_mutex("glock"); -void usage() -{ - cout << -"usage: \n" -" ceph-dedup-tool \n" -" [--op estimate --pool POOL --chunk-size CHUNK_SIZE --chunk-algorithm ALGO --fingerprint-algorithm FP_ALGO] \n" -" [--op chunk-scrub --op chunk-scrub --chunk-pool POOL] \n" -" [--op chunk-get-ref --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n" -" [--op chunk-put-ref --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n" -" [--op chunk-repair --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n" -" [--op dump-chunk-refs --chunk-pool POOL --object OID] \n" -" [--op chunk-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --source-off OFFSET --source-length LENGTH] \n" -" [--op object-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --dedup-cdc-chunk-size CHUNK_SIZE] \n" - << std::endl; - cout << "optional arguments: " << std::endl; - cout << " --object " << std::endl; - cout << " --chunk-size chunk-size (byte) " << std::endl; - cout << " --chunk-algorithm " << std::endl; - cout << " --fingerprint-algorithm " << std::endl; - cout << " --chunk-pool " << std::endl; - cout << " --max-thread " << std::endl; - cout << " --report-period " << std::endl; - cout << " --max-seconds " << std::endl; - cout << " --max-read-size " << std::endl; - cout << "explanations: " << std::endl; - cout << " chunk-dedup performs deduplication using a chunk generated by given source" << std::endl; - cout << " offset and length. object-dedup deduplicates the entire object, not a chunk" << std::endl; - exit(1); +po::options_description make_usage() { + po::options_description desc("Usage"); + desc.add_options() + ("help,h", ": produce help message") + ("op estimate --pool --chunk-size --chunk-algorithm --fingerprint-algorithm ", + ": estimate how many chunks are redundant") + ("op chunk-scrub --chunk-pool ", + ": perform chunk scrub") + ("op chunk-get-ref --chunk-pool --object --target-ref --target-ref-pool-id ", + ": get chunk object's reference") + ("op chunk-put-ref --chunk-pool --object --target-ref --target-ref-pool-id ", + ": put chunk object's reference") + ("op chunk-repair --chunk-pool --object --target-ref --target-ref-pool-id ", + ": fix mismatched references") + ("op dump-chunk-refs --chunk-pool --object ", + ": dump chunk object's references") + ("op chunk-dedup --pool --object --chunk-pool --fingerprint-algorithm --source-off --source-length ", + ": perform a chunk dedup---deduplicate only a chunk, which is a part of object.") + ("op object-dedup --pool --object --chunk-pool --fingerprint-algorithm --dedup-cdc-chunk-size [--snap]", + ": perform a object dedup---deduplicate the entire object, not a chunk. Related snapshots are also deduplicated if --snap is given") + ; + po::options_description op_desc("Opational arguments"); + op_desc.add_options() + ("op", po::value(), ": estimate|chunk-scrub|chunk-get-ref|chunk-put-ref|chunk-repair|dump-chunk-refs|chunk-dedup|object-dedup") + ("target-ref", po::value(), ": set target object") + ("target-ref-pool-id", po::value(), ": set target pool id") + ("object", po::value(), ": set object name") + ("chunk-size", po::value(), ": chunk size (byte)") + ("chunk-algorithm", po::value(), ": , set chunk-algorithm") + ("fingerprint-algorithm", po::value(), ": , set fingerprint-algorithm") + ("chunk-pool", po::value(), ": set chunk pool name") + ("max-thread", po::value(), ": set max thread") + ("report-period", po::value(), ": set report-period") + ("max-seconds", po::value(), ": set max runtime") + ("max-read-size", po::value(), ": set max read size") + ("pool", po::value(), ": set pool name") + ("min-chunk-size", po::value(), ": min chunk size (byte)") + ("max-chunk-size", po::value(), ": max chunk size (byte)") + ("source-off", po::value(), ": set source offset") + ("source-length", po::value(), ": set source length") + ("dedup-cdc-chunk-size", po::value(), ": set dedup chunk size for cdc") + ("snap", ": deduplciate snapshotted object") + ("debug", ": enable debug") + ("pgid", ": set pgid") + ; + desc.add(op_desc); + return desc; } template @@ -517,15 +540,94 @@ void ChunkScrub::print_status(Formatter *f, ostream &out) } } -int estimate_dedup_ratio(const std::map < std::string, std::string > &opts, - std::vector &nargs) +string get_opts_pool_name(const po::variables_map &opts) { + if (opts.count("pool")) { + return opts["pool"].as(); + } + cerr << "must specify pool name" << std::endl; + exit(1); +} + +string get_opts_chunk_algo(const po::variables_map &opts) { + if (opts.count("chunk-algorithm")) { + string chunk_algo = opts["chunk-algorithm"].as(); + if (!CDC::create(chunk_algo, 12)) { + cerr << "unrecognized chunk-algorithm " << chunk_algo << std::endl; + exit(1); + } + return chunk_algo; + } + cerr << "must specify chunk-algorithm" << std::endl; + exit(1); +} + +string get_opts_fp_algo(const po::variables_map &opts) { + if (opts.count("fingerprint-algorithm")) { + string fp_algo = opts["fingerprint-algorithm"].as(); + if (fp_algo != "sha1" + && fp_algo != "sha256" && fp_algo != "sha512") { + cerr << "unrecognized fingerprint-algorithm " << fp_algo << std::endl; + exit(1); + } + return fp_algo; + } + cout << "SHA1 is set as fingerprint algorithm by default" << std::endl; + return string("sha1"); +} + +string get_opts_op_name(const po::variables_map &opts) { + if (opts.count("op")) { + return opts["op"].as(); + } else { + cerr << "must specify op" << std::endl; + exit(1); + } +} + +string get_opts_chunk_pool(const po::variables_map &opts) { + if (opts.count("chunk-pool")) { + return opts["chunk-pool"].as(); + } else { + cerr << "must specify --chunk-pool" << std::endl; + exit(1); + } +} + +string get_opts_object_name(const po::variables_map &opts) { + if (opts.count("object")) { + return opts["object"].as(); + } else { + cerr << "must specify object" << std::endl; + exit(1); + } +} + +int get_opts_max_thread(const po::variables_map &opts) { + if (opts.count("max-thread")) { + return opts["max-thread"].as(); + } else { + cout << "2 is set as the number of threads by default" << std::endl; + return 2; + } +} + +int get_opts_report_period(const po::variables_map &opts) { + if (opts.count("report-period")) { + return opts["report-period"].as(); + } else { + cout << "10 seconds is set as report period by default" << std::endl; + return 10; + } +} + +int estimate_dedup_ratio(const po::variables_map &opts) { Rados rados; IoCtx io_ctx; std::string chunk_algo = "fastcdc"; string fp_algo = "sha1"; string pool_name; - uint64_t chunk_size = 0; + uint64_t chunk_size = 8192; uint64_t min_chunk_size = 8192; uint64_t max_chunk_size = 4*1024*1024; unsigned max_thread = default_max_thread; @@ -541,13 +643,9 @@ int estimate_dedup_ratio(const std::map < std::string, std::string > &opts, list pool_names; map stats; - i = opts.find("pool"); - if (i != opts.end()) { - pool_name = i->second.c_str(); - } - i = opts.find("chunk-algorithm"); - if (i != opts.end()) { - chunk_algo = i->second.c_str(); + pool_name = get_opts_pool_name(opts); + if (opts.count("chunk-algorithm")) { + chunk_algo = opts["chunk-algorithm"].as(); if (!CDC::create(chunk_algo, 12)) { cerr << "unrecognized chunk-algorithm " << chunk_algo << std::endl; exit(1); @@ -556,69 +654,38 @@ int estimate_dedup_ratio(const std::map < std::string, std::string > &opts, cerr << "must specify chunk-algorithm" << std::endl; exit(1); } - - i = opts.find("fingerprint-algorithm"); - if (i != opts.end()) { - fp_algo = i->second.c_str(); - if (fp_algo != "sha1" - && fp_algo != "sha256" && fp_algo != "sha512") { - cerr << "unrecognized fingerprint-algorithm " << fp_algo << std::endl; - exit(1); - } - } - - i = opts.find("chunk-size"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &chunk_size)) { - return -EINVAL; - } + fp_algo = get_opts_fp_algo(opts); + if (opts.count("chunk-size")) { + chunk_size = opts["chunk-size"].as(); + } else { + cout << "8192 is set as chunk size by default" << std::endl; } - - i = opts.find("min-chunk-size"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &min_chunk_size)) { - return -EINVAL; - } + if (opts.count("min-chunk-size")) { + chunk_size = opts["min-chunk-size"].as(); + } else { + cout << "8192 is set as min chunk size by default" << std::endl; } - i = opts.find("max-chunk-size"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &max_chunk_size)) { - return -EINVAL; - } + if (opts.count("max-chunk-size")) { + chunk_size = opts["max-chunk-size"].as(); + } else { + cout << "4MB is set as max chunk size by default" << std::endl; } - - i = opts.find("max-thread"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &max_thread)) { - return -EINVAL; - } - } - - i = opts.find("report-period"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &report_period)) { - return -EINVAL; - } + max_thread = get_opts_max_thread(opts); + report_period = get_opts_report_period(opts); + if (opts.count("max-seconds")) { + max_seconds = opts["max-seconds"].as(); + } else { + cout << "max seconds is not set" << std::endl; } - i = opts.find("max-seconds"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &max_seconds)) { - return -EINVAL; - } + if (opts.count("max-read-size")) { + max_read_size = opts["max-read-size"].as(); + } else { + cout << default_op_size << " is set as max-read-size by default" << std::endl; } - i = opts.find("max-read-size"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &max_read_size)) { - return -EINVAL; - } - } - i = opts.find("debug"); - if (i != opts.end()) { + if (opts.count("debug")) { debug = true; } - - i = opts.find("pgid"); - boost::optional pgid(i != opts.end(), pg_t()); + boost::optional pgid(opts.count("pgid"), pg_t()); ret = rados.init_with_context(g_ceph_context); if (ret < 0) { @@ -715,8 +782,7 @@ static void print_chunk_scrub() cout << " Damaged object : " << damaged_objects << std::endl; } -int chunk_scrub_common(const std::map < std::string, std::string > &opts, - std::vector &nargs) +int chunk_scrub_common(const po::variables_map &opts) { Rados rados; IoCtx io_ctx, chunk_io_ctx; @@ -732,35 +798,11 @@ int chunk_scrub_common(const std::map < std::string, std::string > &opts, list pool_names; map stats; - i = opts.find("op_name"); - if (i != opts.end()) { - op_name= i->second.c_str(); - } else { - cerr << "must specify op" << std::endl; - exit(1); - } - - i = opts.find("chunk-pool"); - if (i != opts.end()) { - chunk_pool_name = i->second.c_str(); - } else { - cerr << "must specify --chunk-pool" << std::endl; - exit(1); - } - i = opts.find("max-thread"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &max_thread)) { - return -EINVAL; - } - } - i = opts.find("report-period"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &report_period)) { - return -EINVAL; - } - } - i = opts.find("pgid"); - boost::optional pgid(i != opts.end(), pg_t()); + op_name = get_opts_op_name(opts); + chunk_pool_name = get_opts_chunk_pool(opts); + max_thread = get_opts_max_thread(opts); + report_period = get_opts_report_period(opts); + boost::optional pgid(opts.count("pgid"), pg_t()); ret = rados.init_with_context(g_ceph_context); if (ret < 0) { @@ -786,25 +828,15 @@ int chunk_scrub_common(const std::map < std::string, std::string > &opts, op_name == "chunk-repair") { string target_object_name; uint64_t pool_id; - i = opts.find("object"); - if (i != opts.end()) { - object_name = i->second.c_str(); - } else { - cerr << "must specify object" << std::endl; - exit(1); - } - i = opts.find("target-ref"); - if (i != opts.end()) { - target_object_name = i->second.c_str(); + object_name = get_opts_object_name(opts); + if (opts.count("target-ref")) { + target_object_name = opts["target-ref"].as(); } else { cerr << "must specify target ref" << std::endl; exit(1); } - i = opts.find("target-ref-pool-id"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &pool_id)) { - return -EINVAL; - } + if (opts.count("target-ref-pool-id")) { + pool_id = opts["target-ref-pool-id"].as(); } else { cerr << "must specify target-ref-pool-id" << std::endl; exit(1); @@ -897,13 +929,7 @@ int chunk_scrub_common(const std::map < std::string, std::string > &opts, return ret; } else if (op_name == "dump-chunk-refs") { - i = opts.find("object"); - if (i != opts.end()) { - object_name = i->second.c_str(); - } else { - cerr << "must specify object" << std::endl; - exit(1); - } + object_name = get_opts_object_name(opts); bufferlist t; ret = chunk_io_ctx.getxattr(object_name, CHUNK_REFCOUNT_ATTR, t); if (ret < 0) { @@ -968,8 +994,7 @@ string make_pool_str(string pool, string var, int val) return make_pool_str(pool, var, stringify(val)); } -int make_dedup_object(const std::map < std::string, std::string > &opts, - std::vector &nargs) +int make_dedup_object(const po::variables_map &opts) { Rados rados; IoCtx io_ctx, chunk_io_ctx; @@ -977,37 +1002,11 @@ int make_dedup_object(const std::map < std::string, std::string > &opts, int ret; std::map::const_iterator i; - i = opts.find("op_name"); - if (i != opts.end()) { - op_name = i->second; - } else { - cerr << "must specify op" << std::endl; - exit(1); - } - i = opts.find("pool"); - if (i != opts.end()) { - pool_name = i->second; - } else { - cerr << "must specify --pool" << std::endl; - exit(1); - } - i = opts.find("object"); - if (i != opts.end()) { - object_name = i->second; - } else { - cerr << "must specify object" << std::endl; - exit(1); - } - - i = opts.find("chunk-pool"); - if (i != opts.end()) { - chunk_pool_name = i->second; - } else { - cerr << "must specify --chunk-pool" << std::endl; - exit(1); - } - i = opts.find("pgid"); - boost::optional pgid(i != opts.end(), pg_t()); + op_name = get_opts_op_name(opts); + pool_name = get_opts_pool_name(opts); + object_name = get_opts_object_name(opts); + chunk_pool_name = get_opts_chunk_pool(opts); + boost::optional pgid(opts.count("pgid"), pg_t()); ret = rados.init_with_context(g_ceph_context); if (ret < 0) { @@ -1034,35 +1033,21 @@ int make_dedup_object(const std::map < std::string, std::string > &opts, << cpp_strerror(ret) << std::endl; goto out; } - i = opts.find("fingerprint-algorithm"); - if (i != opts.end()) { - fp_algo = i->second.c_str(); - if (fp_algo != "sha1" - && fp_algo != "sha256" && fp_algo != "sha512") { - cerr << "unrecognized fingerprint-algorithm " << fp_algo << std::endl; - exit(1); - } - } + fp_algo = get_opts_fp_algo(opts); if (op_name == "chunk-dedup") { uint64_t offset, length; string chunk_object; - i = opts.find("source-off"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &offset)) { - return -EINVAL; - } + if (opts.count("source-off")) { + offset = opts["source-off"].as(); } else { cerr << "must specify --source-off" << std::endl; exit(1); } - i = opts.find("source-length"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &length)) { - return -EINVAL; - } + if (opts.count("source-length")) { + length = opts["source-length"].as(); } else { - cerr << "must specify --source-off" << std::endl; + cerr << "must specify --source-length" << std::endl; exit(1); } // 1. make a copy from manifest object to chunk object @@ -1099,17 +1084,15 @@ int make_dedup_object(const std::map < std::string, std::string > &opts, goto out; } } else if (op_name == "object-dedup") { - unsigned chunk_size; + unsigned chunk_size = 0; bool snap = false; - i = opts.find("dedup-cdc-chunk-size"); - if (i != opts.end()) { - if (rados_sistrtoll(i, &chunk_size)) { - cerr << "unrecognized dedup_cdc_chunk_size " << chunk_size << std::endl; - return -EINVAL; - } + if (opts.count("dedup-cdc-chunk-size")) { + chunk_size = opts["dedup-cdc-chunk-size"].as(); + } else { + cerr << "must specify --dedup-cdc-chunk-size" << std::endl; + exit(1); } - i = opts.find("snap"); - if (i != opts.end()) { + if (opts.count("snap")) { snap = true; } @@ -1242,86 +1225,41 @@ int main(int argc, const char **argv) cerr << argv[0] << ": -h or --help for usage" << std::endl; exit(1); } - if (ceph_argparse_need_usage(args)) { - usage(); + + po::variables_map opts; + po::positional_options_description p; + p.add("command", 1); + po::options_description desc = make_usage(); + try { + po::parsed_options parsed = + po::command_line_parser(argc, argv).options(desc).positional(p).allow_unregistered().run(); + po::store(parsed, opts); + po::notify(opts); + } catch(po::error &e) { + std::cerr << e.what() << std::endl; + return 1; + } + if (opts.count("help") || opts.count("h")) { + cout<< desc << std::endl; exit(0); } - std::string fn; - string op_name; - auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); init_async_signal_handler(); register_async_signal_handler_oneshot(SIGINT, handle_signal); register_async_signal_handler_oneshot(SIGTERM, handle_signal); - std::map < std::string, std::string > opts; - std::string val; - std::vector::iterator i; - for (i = args.begin(); i != args.end(); ) { - if (ceph_argparse_double_dash(args, i)) { - break; - } else if (ceph_argparse_witharg(args, i, &val, "--op", (char*)NULL)) { - opts["op_name"] = val; - op_name = val; - } else if (ceph_argparse_witharg(args, i, &val, "--pool", (char*)NULL)) { - opts["pool"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--object", (char*)NULL)) { - opts["object"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--chunk-algorithm", (char*)NULL)) { - opts["chunk-algorithm"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--chunk-size", (char*)NULL)) { - opts["chunk-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--fingerprint-algorithm", (char*)NULL)) { - opts["fingerprint-algorithm"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--chunk-pool", (char*)NULL)) { - opts["chunk-pool"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--target-ref", (char*)NULL)) { - opts["target-ref"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--target-ref-pool-id", (char*)NULL)) { - opts["target-ref-pool-id"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-thread", (char*)NULL)) { - opts["max-thread"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--report-period", (char*)NULL)) { - opts["report-period"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-read-size", (char*)NULL)) { - opts["max-seconds"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-seconds", (char*)NULL)) { - opts["max-seconds"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--min-chunk-size", (char*)NULL)) { - opts["min-chunk-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-chunk-size", (char*)NULL)) { - opts["max-chunk-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--chunk-object", (char*)NULL)) { - opts["chunk-object"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--source-off", (char*)NULL)) { - opts["source-off"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--source-length", (char*)NULL)) { - opts["source-length"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--dedup-cdc-chunk-size", (char*)NULL)) { - opts["dedup-cdc-chunk-size"] = val; - } else if (ceph_argparse_flag(args, i, "--snap", (char*)NULL)) { - opts["snap"] = "true"; - } else if (ceph_argparse_flag(args, i, "--debug", (char*)NULL)) { - opts["debug"] = "true"; - } else { - if (val[0] == '-') { - cerr << "unrecognized option " << val << std::endl; - exit(1); - } - ++i; - } - } + string op_name = get_opts_op_name(opts); if (op_name == "estimate") { - return estimate_dedup_ratio(opts, args); + return estimate_dedup_ratio(opts); } else if (op_name == "chunk-scrub" || op_name == "chunk-get-ref" || op_name == "chunk-put-ref" || op_name == "chunk-repair" || op_name == "dump-chunk-refs") { - return chunk_scrub_common(opts, args); + return chunk_scrub_common(opts); } else if (op_name == "chunk-dedup" || op_name == "object-dedup") { /* @@ -1334,7 +1272,7 @@ int main(int argc, const char **argv) * perform deduplication on the entire object, not a chunk. * */ - return make_dedup_object(opts, args); + return make_dedup_object(opts); } else { cerr << "unrecognized op " << op_name << std::endl; exit(1); -- 2.39.5