From: Loic Dachary Date: Sat, 13 Sep 2014 08:16:31 +0000 (+0200) Subject: erasure-code: store and compare encoded contents X-Git-Tag: v0.80.8~17^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4f4358708ed3c261ca4027cc9c3dc3f952a99470;p=ceph.git erasure-code: store and compare encoded contents Introduce ceph_erasure_code_non_regression to check and compare how an erasure code plugin encodes and decodes content with a given set of parameters. For instance: ./ceph_erasure_code_non_regression \ --plugin jerasure \ --parameter technique=reed_sol_van \ --parameter k=2 \ --parameter m=2 \ --stripe-width 3181 \ --create \ --check Will create an encoded object (--create) and store it into a directory along with the chunks, one chunk per file. The directory name is derived from the parameters. The content of the object is a random pattern of 31 bytes repeated to fill the object size specified with --stripe-width. The check function (--check) reads the object back from the file, encodes it and compares the result with the content of the chunks read from the files. It also attempts recover from one or two erasures. Chunks encoded by a given version of Ceph are expected to be encoded exactly in the same way by all Ceph versions going forward. http://tracker.ceph.com/issues/9420 Refs: #9420 Signed-off-by: Loic Dachary --- diff --git a/src/.gitignore b/src/.gitignore index 82e077c33d4..126eff4b5b5 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -30,6 +30,7 @@ Makefile /ceph_multi_stress_watch /ceph_erasure_code /ceph_erasure_code_benchmark +/ceph_erasure_code_non_regression /ceph_psim /ceph_radosacl /ceph_rgw_jsonparser diff --git a/src/test/erasure-code/Makefile.am b/src/test/erasure-code/Makefile.am index fdbe003f6ce..c91eef9b244 100644 --- a/src/test/erasure-code/Makefile.am +++ b/src/test/erasure-code/Makefile.am @@ -9,6 +9,14 @@ ceph_erasure_code_benchmark_LDADD += -ldl endif bin_DEBUGPROGRAMS += ceph_erasure_code_benchmark +ceph_erasure_code_non_regression_SOURCES = \ + test/erasure-code/ceph_erasure_code_non_regression.cc +ceph_erasure_code_non_regression_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) +if LINUX +ceph_erasure_code_non_regression_LDADD += -ldl +endif +noinst_PROGRAMS += ceph_erasure_code_non_regression + ceph_erasure_code_SOURCES = \ test/erasure-code/ceph_erasure_code.cc ceph_erasure_code_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL) diff --git a/src/test/erasure-code/ceph_erasure_code_non_regression.cc b/src/test/erasure-code/ceph_erasure_code_non_regression.cc new file mode 100644 index 00000000000..c04accf754a --- /dev/null +++ b/src/test/erasure-code/ceph_erasure_code_non_regression.cc @@ -0,0 +1,325 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * Red Hat (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/errno.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "erasure-code/ErasureCodePlugin.h" + +namespace po = boost::program_options; +using namespace std; + +class ErasureCodeNonRegression { + unsigned stripe_width; + string plugin; + bool create; + bool check; + string base; + string directory; + map parameters; +public: + int setup(int argc, char** argv); + int run(); + int run_create(); + int run_check(); + int decode_erasures(ErasureCodeInterfaceRef erasure_code, + set erasures, + map chunks); + string content_path(); + string chunk_path(unsigned int chunk); +}; + +int ErasureCodeNonRegression::setup(int argc, char** argv) { + + po::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "produce help message") + ("stripe-width,s", po::value()->default_value(4 * 1024), + "stripe_width, i.e. the size of the buffer to be encoded") + ("plugin,p", po::value()->default_value("jerasure"), + "erasure code plugin name") + ("base", po::value()->default_value("."), + "prefix all paths with base") + ("parameter,P", po::value >(), + "parameters") + ("create", "create the erasure coded content in the directory") + ("check", "check the content in the directory matches the chunks and vice versa") + ; + + po::variables_map vm; + po::parsed_options parsed = + po::command_line_parser(argc, argv).options(desc).allow_unregistered().run(); + po::store( + parsed, + vm); + po::notify(vm); + + vector ceph_options, def_args; + vector ceph_option_strings = po::collect_unrecognized( + parsed.options, po::include_positional); + ceph_options.reserve(ceph_option_strings.size()); + for (vector::iterator i = ceph_option_strings.begin(); + i != ceph_option_strings.end(); + ++i) { + ceph_options.push_back(i->c_str()); + } + + global_init( + &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + g_ceph_context->_conf->apply_changes(NULL); + + if (vm.count("help")) { + cout << desc << std::endl; + return 1; + } + + stripe_width = vm["stripe-width"].as(); + plugin = vm["plugin"].as(); + base = vm["base"].as(); + check = vm.count("check") > 0; + create = vm.count("create") > 0; + + if (!check && !create) { + cerr << "must specifify either --check or --create" << endl; + return 1; + } + + { + stringstream path; + path << base << "/" << "plugin=" << plugin << " stipe-width=" << stripe_width; + directory = path.str(); + } + + if (vm.count("parameter")) { + const vector &p = vm["parameter"].as< vector >(); + for (vector::const_iterator i = p.begin(); + i != p.end(); + ++i) { + std::vector strs; + boost::split(strs, *i, boost::is_any_of("=")); + if (strs.size() != 2) { + cerr << "--parameter " << *i << " ignored because it does not contain exactly one =" << endl; + } else { + parameters[strs[0]] = strs[1]; + } + if (strs[0] != "directory") + directory += " " + *i; + } + } + if (parameters.count("directory") == 0) + parameters["directory"] = ".libs"; + + return 0; +} + +int ErasureCodeNonRegression::run() + { + int ret = 0; + if(create && (ret = run_create())) + return ret; + if(check && (ret = run_check())) + return ret; + return ret; +} + +int ErasureCodeNonRegression::run_create() +{ + ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); + ErasureCodeInterfaceRef erasure_code; + stringstream messages; + int code = instance.factory(plugin, parameters, &erasure_code, messages); + if (code) { + cerr << messages.str() << endl; + return code; + } + + if (::mkdir(directory.c_str(), 0755)) { + cerr << "mkdir(" << directory << "): " << cpp_strerror(errno) << endl; + return 1; + } + unsigned payload_chunk_size = 37; + string payload; + for (unsigned j = 0; j < payload_chunk_size; ++j) + payload.push_back('a' + (rand() % 26)); + bufferlist in; + for (unsigned j = 0; j < stripe_width; j += payload_chunk_size) + in.append(payload); + if (stripe_width < in.length()) + in.splice(stripe_width, in.length() - stripe_width); + if (in.write_file(content_path().c_str())) + return 1; + set want_to_encode; + for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) { + want_to_encode.insert(i); + } + map encoded; + code = erasure_code->encode(want_to_encode, in, &encoded); + if (code) + return code; + for (map::iterator chunk = encoded.begin(); + chunk != encoded.end(); + chunk++) { + if (chunk->second.write_file(chunk_path(chunk->first).c_str())) + return 1; + } + return 0; +} + +int ErasureCodeNonRegression::decode_erasures(ErasureCodeInterfaceRef erasure_code, + set erasures, + map chunks) +{ + map available; + for (map::iterator chunk = chunks.begin(); + chunk != chunks.end(); + ++chunk) { + if (erasures.count(chunk->first) == 0) + available[chunk->first] = chunk->second; + + } + map decoded; + int code = erasure_code->decode(erasures, available, &decoded); + if (code) + return code; + for (set::iterator erasure = erasures.begin(); + erasure != erasures.end(); + ++erasure) { + if (!chunks[*erasure].contents_equal(decoded[*erasure])) { + cerr << "chunk " << *erasure << " incorrectly recovered" << endl; + return 1; + } + } + return 0; +} + +int ErasureCodeNonRegression::run_check() +{ + ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); + ErasureCodeInterfaceRef erasure_code; + stringstream messages; + int code = instance.factory(plugin, parameters, &erasure_code, messages); + if (code) { + cerr << messages.str() << endl; + return code; + } + string errors; + bufferlist in; + if (in.read_file(content_path().c_str(), &errors)) { + cerr << errors << endl; + return 1; + } + set want_to_encode; + for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) { + want_to_encode.insert(i); + } + + map encoded; + code = erasure_code->encode(want_to_encode, in, &encoded); + if (code) + return code; + + for (map::iterator chunk = encoded.begin(); + chunk != encoded.end(); + chunk++) { + bufferlist existing; + if (existing.read_file(chunk_path(chunk->first).c_str(), &errors)) { + cerr << errors << endl; + return 1; + } + bufferlist &old = chunk->second; + if (existing.length() != old.length() || + memcmp(existing.c_str(), old.c_str(), old.length())) { + cerr << "chunk " << chunk->first << " encodes differently" << endl; + return 1; + } + } + + // erasing a single chunk is likely to use a specific code path in every plugin + set erasures; + erasures.clear(); + erasures.insert(0); + code = decode_erasures(erasure_code, erasures, encoded); + if (code) + return code; + + if (erasure_code->get_chunk_count() - erasure_code->get_data_chunk_count() > 1) { + // erasing two chunks is likely to be the general case + erasures.clear(); + erasures.insert(0); + erasures.insert(erasure_code->get_chunk_count() - 1); + code = decode_erasures(erasure_code, erasures, encoded); + if (code) + return code; + } + + return 0; +} + +string ErasureCodeNonRegression::content_path() +{ + stringstream path; + path << directory << "/content"; + return path.str(); +} + +string ErasureCodeNonRegression::chunk_path(unsigned int chunk) +{ + stringstream path; + path << directory << "/" << chunk; + return path.str(); +} + +int main(int argc, char** argv) { + ErasureCodeNonRegression non_regression; + int err = non_regression.setup(argc, argv); + if (err) + return err; + return non_regression.run(); +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; make -j4 && + * make ceph_erasure_code_non_regression && + * libtool --mode=execute valgrind --tool=memcheck --leak-check=full \ + * ./ceph_erasure_code_non_regression \ + * --plugin jerasure \ + * --parameter directory=.libs \ + * --parameter technique=reed_sol_van \ + * --parameter k=2 \ + * --parameter m=2 \ + * --directory /tmp/ceph_erasure_code_non_regression \ + * --stripe-width 3181 \ + * --create \ + * --check + * " + * End: + */