From: Loic Dachary Date: Fri, 29 Aug 2014 21:58:34 +0000 (+0200) Subject: erasure-code: lowercase LRC plugin name X-Git-Tag: v0.86~161^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F2358%2Fhead;p=ceph.git erasure-code: lowercase LRC plugin name So that all erasure code plugins are lowercase. Signed-off-by: Loic Dachary --- diff --git a/doc/rados/operations/erasure-code-lrc.rst b/doc/rados/operations/erasure-code-lrc.rst index 98cc42aa73ec..17500c85b57a 100644 --- a/doc/rados/operations/erasure-code-lrc.rst +++ b/doc/rados/operations/erasure-code-lrc.rst @@ -8,8 +8,8 @@ from all the others. For instance if *jerasure* is configured with *k=8* and *m=4*, losing one OSD requires reading from the eleven others to repair. -The *LRC* erasure code plugin creates local parity chunks to be able -to recover using less OSDs. For instance if *LRC* is configured with +The *lrc* erasure code plugin creates local parity chunks to be able +to recover using less OSDs. For instance if *lrc* is configured with *k=8*, *m=4* and *l=4*, it will create an additional parity chunk for every four OSDs. When a single OSD is lost, it can be recovered with only four OSDs instead of eleven. @@ -25,7 +25,7 @@ connected to the same switch, reduced bandwidth usage can actually be observed.:: $ ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ k=4 m=2 l=3 \ ruleset-failure-domain=host $ ceph osd pool create lrcpool 12 12 erasure LRCprofile @@ -38,20 +38,20 @@ In Firefly the reduced bandwidth will only be observed if the primary OSD is in the same rack as the lost chunk.:: $ ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ k=4 m=2 l=3 \ ruleset-locality=rack \ ruleset-failure-domain=host $ ceph osd pool create lrcpool 12 12 erasure LRCprofile -Create an LRC profile +Create an lrc profile ===================== -To create a new LRC erasure code profile:: +To create a new lrc erasure code profile:: ceph osd erasure-code-profile set {name} \ - plugin=LRC \ + plugin=lrc \ k={data-chunks} \ m={coding-chunks} \ l={locality} \ @@ -155,7 +155,7 @@ define locality sets, for instance datacenters and racks into datacenters. The **k/m/l** are implemented by generating a low level configuration. -The *LRC* erasure code plugin recursively applies erasure code +The *lrc* erasure code plugin recursively applies erasure code techniques so that recovering from the loss of some chunks only requires a subset of the available chunks, most of the time. @@ -182,7 +182,7 @@ implies *K=2*, the *c* implies *M=1* and the *jerasure* plugin is used by default.:: $ ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ mapping=DD_ \ layers='[ [ "DDc", "" ] ]' $ ceph osd pool create lrcpool 12 12 erasure LRCprofile @@ -196,7 +196,7 @@ observed. It is equivalent to **k=4**, **m=2** and **l=3** although the layout of the chunks is different:: $ ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ mapping=__DD__DD \ layers='[ [ "_cDD_cDD", "" ], @@ -213,7 +213,7 @@ In Firefly the reduced bandwidth will only be observed if the primary OSD is in the same rack as the lost chunk.:: $ ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ mapping=__DD__DD \ layers='[ [ "_cDD_cDD", "" ], diff --git a/src/erasure-code/LRC/ErasureCodeLRC.cc b/src/erasure-code/LRC/ErasureCodeLRC.cc deleted file mode 100644 index 3c69fe02f019..000000000000 --- a/src/erasure-code/LRC/ErasureCodeLRC.cc +++ /dev/null @@ -1,834 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Cloudwatt - * - * Author: Loic Dachary - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#include -#include - -#include "include/str_map.h" -#include "common/debug.h" -#include "crush/CrushWrapper.h" -#include "osd/osd_types.h" -#include "include/stringify.h" -#include "erasure-code/ErasureCodePlugin.h" -#include "json_spirit/json_spirit_writer.h" - -#include "ErasureCodeLRC.h" - -// re-include our assert to clobber boost's -#include "include/assert.h" - -#define dout_subsys ceph_subsys_osd -#undef dout_prefix -#define dout_prefix _prefix(_dout) - -static ostream& _prefix(std::ostream* _dout) -{ - return *_dout << "ErasureCodeLRC: "; -} - -int ErasureCodeLRC::create_ruleset(const string &name, - CrushWrapper &crush, - ostream *ss) const -{ - if (crush.rule_exists(name)) { - *ss << "rule " << name << " exists"; - return -EEXIST; - } - if (!crush.name_exists(ruleset_root)) { - *ss << "root item " << ruleset_root << " does not exist"; - return -ENOENT; - } - int root = crush.get_item_id(ruleset_root); - - int ruleset = 0; - for (int i = 0; i < crush.get_max_rules(); i++) { - if (crush.rule_exists(i) && - crush.get_rule_mask_ruleset(i) >= ruleset) { - ruleset = crush.get_rule_mask_ruleset(i) + 1; - } - } - - int steps = 3 + ruleset_steps.size(); - int min_rep = 3; - int max_rep = 30; - crush_rule *rule = crush_make_rule(steps, ruleset, - pg_pool_t::TYPE_ERASURE, - min_rep, max_rep); - assert(rule); - int step = 0; - crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); - // [ [ "choose", "rack", 2 ], - // [ "chooseleaf", "host", 5 ] ] - for (vector::const_iterator i = ruleset_steps.begin(); - i != ruleset_steps.end(); - ++i) { - int op = i->op == "chooseleaf" ? - CRUSH_RULE_CHOOSELEAF_INDEP : CRUSH_RULE_CHOOSE_INDEP; - int type = crush.get_type_id(i->type); - if (type < 0) { - *ss << "unknown crush type " << i->type; - return -EINVAL; - } - crush_rule_set_step(rule, step++, op, i->n, type); - } - crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); - int rno = crush_add_rule(crush.crush, rule, -1); - crush.set_rule_name(rno, name); - - return ruleset; -} - -int ErasureCodeLRC::layers_description(const map ¶meters, - json_spirit::mArray *description, - ostream *ss) const -{ - if (parameters.count("layers") == 0) { - *ss << "could not find 'layers' in " << parameters << std::endl; - return ERROR_LRC_DESCRIPTION; - } - string str = parameters.find("layers")->second; - try { - json_spirit::mValue json; - json_spirit::read_or_throw(str, json); - - if (json.type() != json_spirit::array_type) { - *ss << "layers='" << str - << "' must be a JSON array but is of type " - << json.type() << " instead" << std::endl; - return ERROR_LRC_ARRAY; - } - *description = json.get_array(); - } catch (json_spirit::Error_position &e) { - *ss << "failed to parse layers='" << str << "'" - << " at line " << e.line_ << ", column " << e.column_ - << " : " << e.reason_ << std::endl; - return ERROR_LRC_PARSE_JSON; - } - return 0; -} - -int ErasureCodeLRC::layers_parse(string description_string, - json_spirit::mArray description, - ostream *ss) -{ - int position = 0; - for (vector::iterator i = description.begin(); - i != description.end(); - i++, position++) { - if (i->type() != json_spirit::array_type) { - stringstream json_string; - json_spirit::write(*i, json_string); - *ss << "each element of the array " - << description_string << " must be a JSON array but " - << json_string.str() << " at position " << position - << " is of type " << i->type() << " instead" << std::endl; - return ERROR_LRC_ARRAY; - } - json_spirit::mArray layer_json = i->get_array(); - map parameters; - int index = 0; - for (vector::iterator j = layer_json.begin(); - j != layer_json.end(); - ++j, ++index) { - if (index == 0) { - if (j->type() != json_spirit::str_type) { - stringstream element; - json_spirit::write(*j, element); - *ss << "the first element of the entry " - << element.str() << " (first is zero) " - << position << " in " << description_string - << " is of type " << (*j).type() << " instead of string" << std::endl; - return ERROR_LRC_STR; - } - layers.push_back(Layer(j->get_str())); - Layer &layer = layers.back(); - layer.chunks_map = j->get_str(); - } else if(index == 1) { - Layer &layer = layers.back(); - if (j->type() != json_spirit::str_type && - j->type() != json_spirit::obj_type) { - stringstream element; - json_spirit::write(*j, element); - *ss << "the second element of the entry " - << element.str() << " (first is zero) " - << position << " in " << description_string - << " is of type " << (*j).type() << " instead of string or object" - << std::endl; - return ERROR_LRC_CONFIG_OPTIONS; - } - if (j->type() == json_spirit::str_type) { - int err = get_json_str_map(j->get_str(), *ss, &layer.parameters); - if (err) - return err; - } else if (j->type() == json_spirit::obj_type) { - json_spirit::mObject o = j->get_obj(); - - for (map::iterator i = o.begin(); - i != o.end(); - ++i) { - layer.parameters[i->first] = i->second.get_str(); - } - } - } else { - // ignore trailing elements - } - } - } - return 0; -} - -int ErasureCodeLRC::layers_init() -{ - ErasureCodePluginRegistry ®istry = ErasureCodePluginRegistry::instance(); - int err; - for (unsigned int i = 0; i < layers.size(); i++) { - Layer &layer = layers[i]; - int position = 0; - for(std::string::iterator it = layer.chunks_map.begin(); - it != layer.chunks_map.end(); - ++it) { - if (*it == 'D') - layer.data.push_back(position); - if (*it == 'c') - layer.coding.push_back(position); - if (*it == 'c' || *it == 'D') - layer.chunks_as_set.insert(position); - position++; - } - layer.chunks = layer.data; - layer.chunks.insert(layer.chunks.end(), - layer.coding.begin(), layer.coding.end()); - if (layer.parameters.find("k") == layer.parameters.end()) - layer.parameters["k"] = stringify(layer.data.size()); - if (layer.parameters.find("m") == layer.parameters.end()) - layer.parameters["m"] = stringify(layer.coding.size()); - if (layer.parameters.find("plugin") == layer.parameters.end()) - layer.parameters["plugin"] = "jerasure"; - if (layer.parameters.find("technique") == layer.parameters.end()) - layer.parameters["technique"] = "reed_sol_van"; - if (layer.parameters.find("directory") == layer.parameters.end()) - layer.parameters["directory"] = directory; - stringstream ss; - err = registry.factory(layer.parameters["plugin"], - layer.parameters, - &layer.erasure_code, - ss); - if (err) { - derr << ss.str() << dendl; - return err; - } - } - return 0; -} - -int ErasureCodeLRC::layers_sanity_checks(string description_string, - ostream *ss) const -{ - int position = 0; - - if (layers.size() < 1) { - *ss << "layers parameter has " << layers.size() - << " which is less than the minimum of one. " - << description_string << std::endl; - return ERROR_LRC_LAYERS_COUNT; - } - for (vector::const_iterator layer = layers.begin(); - layer != layers.end(); - ++layer) { - if (chunk_count != layer->chunks_map.length()) { - *ss << "the first element of the array at position " - << position << " (starting from zero) " - << " is the string '" << layer->chunks_map - << " found in the layers parameter " - << description_string << ". It is expected to be " - << chunk_count << " characters long but is " - << layer->chunks_map.length() << " characters long instead " - << std::endl; - return ERROR_LRC_MAPPING_SIZE; - } - } - return 0; -} - -int ErasureCodeLRC::parse(const map ¶meters, - ostream *ss) -{ - int r = ErasureCode::parse(parameters, ss); - if (r) - return r; - - if (parameters.count("directory") != 0) - directory = parameters.find("directory")->second; - - return parse_ruleset(parameters, ss); -} - -int ErasureCodeLRC::parse_kml(map ¶meters, - ostream *ss) -{ - int err = ErasureCode::parse(parameters, ss); - const int DEFAULT = -1; - int k, m, l; - err |= to_int("k", parameters, &k, DEFAULT, ss); - err |= to_int("m", parameters, &m, DEFAULT, ss); - err |= to_int("l", parameters, &l, DEFAULT, ss); - - if (k == DEFAULT && m == DEFAULT && l == DEFAULT) - return 0; - - if ((k != DEFAULT || m != DEFAULT || l != DEFAULT) && - (k == DEFAULT || m == DEFAULT || l == DEFAULT)) { - *ss << "All of k, m, l must be set or none of them in " - << parameters << std::endl; - return ERROR_LRC_ALL_OR_NOTHING; - } - - const char *generated[] = { "mapping", - "layers", - "ruleset-steps" }; - - for (int i = 0; i < 3; i++) { - if (parameters.count(generated[i])) { - *ss << "The " << generated[i] << " parameter cannot be set " - << "when k, m, l are set in " << parameters << std::endl; - return ERROR_LRC_GENERATED; - } - } - - if ((k + m) % l) { - *ss << "k + m must be a multiple of l in " - << parameters << std::endl; - return ERROR_LRC_K_M_MODULO; - } - - int local_group_count = (k + m) / l; - - if (k % local_group_count) { - *ss << "k must be a multiple of (k + m) / l in " - << parameters << std::endl; - return ERROR_LRC_K_MODULO; - } - - if (m % local_group_count) { - *ss << "m must be a multiple of (k + m) / l in " - << parameters << std::endl; - return ERROR_LRC_M_MODULO; - } - - string mapping; - for (int i = 0; i < local_group_count; i++) { - mapping += string(k / local_group_count, 'D') + - string(m / local_group_count, '_') + "_"; - } - parameters["mapping"] = mapping; - - string layers = "[ "; - - // global layer - layers += " [ \""; - for (int i = 0; i < local_group_count; i++) { - layers += string(k / local_group_count, 'D') + - string(m / local_group_count, 'c') + "_"; - } - layers += "\", \"\" ],"; - - // local layers - for (int i = 0; i < local_group_count; i++) { - layers += " [ \""; - for (int j = 0; j < local_group_count; j++) { - if (i == j) - layers += string(l, 'D') + "c"; - else - layers += string(l + 1, '_'); - } - layers += "\", \"\" ],"; - } - parameters["layers"] = layers + "]"; - - map::const_iterator parameter; - string ruleset_locality; - parameter = parameters.find("ruleset-locality"); - if (parameter != parameters.end()) - ruleset_locality = parameter->second; - string ruleset_failure_domain = "host"; - parameter = parameters.find("ruleset-failure-domain"); - if (parameter != parameters.end()) - ruleset_failure_domain = parameter->second; - - if (ruleset_locality != "") { - ruleset_steps.clear(); - ruleset_steps.push_back(Step("choose", ruleset_locality, - local_group_count)); - ruleset_steps.push_back(Step("chooseleaf", ruleset_failure_domain, - l + 1)); - } else if (ruleset_failure_domain != "") { - ruleset_steps.clear(); - ruleset_steps.push_back(Step("chooseleaf", ruleset_failure_domain, 0)); - } - - return 0; -} - -int ErasureCodeLRC::parse_ruleset(const map ¶meters, - ostream *ss) -{ - map::const_iterator parameter; - parameter = parameters.find("ruleset-root"); - if (parameter != parameters.end()) - ruleset_root = parameter->second; - - if (parameters.count("ruleset-steps") != 0) { - ruleset_steps.clear(); - string str = parameters.find("ruleset-steps")->second; - json_spirit::mArray description; - try { - json_spirit::mValue json; - json_spirit::read_or_throw(str, json); - - if (json.type() != json_spirit::array_type) { - *ss << "ruleset-steps='" << str - << "' must be a JSON array but is of type " - << json.type() << " instead" << std::endl; - return ERROR_LRC_ARRAY; - } - description = json.get_array(); - } catch (json_spirit::Error_position &e) { - *ss << "failed to parse ruleset-steps='" << str << "'" - << " at line " << e.line_ << ", column " << e.column_ - << " : " << e.reason_ << std::endl; - return ERROR_LRC_PARSE_JSON; - } - - int position = 0; - for (vector::iterator i = description.begin(); - i != description.end(); - i++, position++) { - if (i->type() != json_spirit::array_type) { - stringstream json_string; - json_spirit::write(*i, json_string); - *ss << "element of the array " - << str << " must be a JSON array but " - << json_string.str() << " at position " << position - << " is of type " << i->type() << " instead" << std::endl; - return ERROR_LRC_ARRAY; - } - int r = parse_ruleset_step(str, i->get_array(), ss); - if (r) - return r; - } - } - return 0; -} - -int ErasureCodeLRC::parse_ruleset_step(string description_string, - json_spirit::mArray description, - ostream *ss) -{ - stringstream json_string; - json_spirit::write(description, json_string); - string op; - string type; - int n; - int position = 0; - for (vector::iterator i = description.begin(); - i != description.end(); - i++, position++) { - if ((position == 0 || position == 1) && - i->type() != json_spirit::str_type) { - *ss << "element " << position << " of the array " - << json_string.str() << " found in " << description_string - << " must be a JSON string but is of type " - << i->type() << " instead" << std::endl; - return position == 0 ? ERROR_LRC_RULESET_OP : ERROR_LRC_RULESET_TYPE; - } - if (position == 2 && i->type() != json_spirit::int_type) { - *ss << "element " << position << " of the array " - << json_string.str() << " found in " << description_string - << " must be a JSON int but is of type " - << i->type() << " instead" << std::endl; - return ERROR_LRC_RULESET_N; - } - - if (position == 0) - op = i->get_str(); - if (position == 1) - type = i->get_str(); - if (position == 2) - n = i->get_int(); - } - ruleset_steps.push_back(Step(op, type, n)); - return 0; -} - -int ErasureCodeLRC::init(const map ¶meters, - ostream *ss) -{ - int r; - - map parameters_rw = parameters; - r = parse_kml(parameters_rw, ss); - if (r) - return r; - - r = parse(parameters_rw, ss); - if (r) - return r; - - json_spirit::mArray description; - r = layers_description(parameters_rw, &description, ss); - if (r) - return r; - - string description_string = parameters_rw.find("layers")->second; - - dout(10) << "init(" << description_string << ")" << dendl; - - r = layers_parse(description_string, description, ss); - if (r) - return r; - - r = layers_init(); - if (r) - return r; - - if (parameters_rw.count("mapping") == 0) { - *ss << "the 'mapping' parameter is missing from " << parameters_rw; - return ERROR_LRC_MAPPING; - } - string mapping = parameters_rw.find("mapping")->second; - data_chunk_count = 0; - for(std::string::iterator it = mapping.begin(); it != mapping.end(); ++it) { - if (*it == 'D') - data_chunk_count++; - } - chunk_count = mapping.length(); - - return layers_sanity_checks(description_string, ss); -} - -set ErasureCodeLRC::get_erasures(const set &want, - const set &available) const -{ - set result; - set_difference(want.begin(), want.end(), - available.begin(), available.end(), - inserter(result, result.end())); - return result; -} - -unsigned int ErasureCodeLRC::get_chunk_size(unsigned int object_size) const -{ - return layers.front().erasure_code->get_chunk_size(object_size); -} - -void p(const set &s) { cerr << s; } // for gdb - -int ErasureCodeLRC::minimum_to_decode(const set &want_to_read, - const set &available_chunks, - set *minimum) -{ - dout(20) << __func__ << " want_to_read " << want_to_read - << " available_chunks " << available_chunks << dendl; - { - set erasures_total; - set erasures_not_recovered; - set erasures_want; - for (unsigned int i = 0; i < get_chunk_count(); ++i) { - if (available_chunks.count(i) == 0) { - erasures_total.insert(i); - erasures_not_recovered.insert(i); - if (want_to_read.count(i) != 0) - erasures_want.insert(i); - } - } - - // - // Case 1: - // - // When no chunk is missing there is no need to read more than what - // is wanted. - // - if (erasures_want.empty()) { - *minimum = want_to_read; - dout(20) << __func__ << " minimum == want_to_read == " - << want_to_read << dendl; - return 0; - } - - // - // Case 2: - // - // Try to recover erasures with as few chunks as possible. - // - for (vector::reverse_iterator i = layers.rbegin(); - i != layers.rend(); - ++i) { - // - // If this layer has no chunk that we want, skip it. - // - set layer_want; - set_intersection(want_to_read.begin(), want_to_read.end(), - i->chunks_as_set.begin(), i->chunks_as_set.end(), - inserter(layer_want, layer_want.end())); - if (layer_want.empty()) - continue; - // - // Are some of the chunks we want missing ? - // - set layer_erasures; - set_intersection(layer_want.begin(), layer_want.end(), - erasures_want.begin(), erasures_want.end(), - inserter(layer_erasures, layer_erasures.end())); - set layer_minimum; - if (layer_erasures.empty()) { - // - // The chunks we want are available, this is the minimum we need - // to read. - // - layer_minimum = layer_want; - } else { - set erasures; - set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(), - erasures_not_recovered.begin(), erasures_not_recovered.end(), - inserter(erasures, erasures.end())); - - if (erasures.size() > i->erasure_code->get_coding_chunk_count()) { - // - // There are too many erasures for this layer to recover: skip - // it and hope that an upper layer will be do better. - // - continue; - } else { - // - // Get all available chunks in that layer to recover the - // missing one(s). - // - set_difference(i->chunks_as_set.begin(), i->chunks_as_set.end(), - erasures_not_recovered.begin(), erasures_not_recovered.end(), - inserter(layer_minimum, layer_minimum.end())); - // - // Chunks recovered by this layer are removed from the list of - // erasures so that upper levels do not attempt to recover - // them. - // - for (set::const_iterator j = erasures.begin(); - j != erasures.end(); - j++) { - erasures_not_recovered.erase(*j); - if (erasures_want.count(*j)) - erasures_want.erase(*j); - } - } - } - minimum->insert(layer_minimum.begin(), layer_minimum.end()); - } - if (erasures_want.empty()) { - minimum->insert(want_to_read.begin(), want_to_read.end()); - for (set::const_iterator i = erasures_total.begin(); - i != erasures_total.end(); - i++) { - if (minimum->count(*i)) - minimum->erase(*i); - } - dout(20) << __func__ << " minimum = " << *minimum << dendl; - return 0; - } - } - - { - // - // Case 3: - // - // The previous strategy failed to recover from all erasures. - // - // Try to recover as many chunks as possible, even from layers - // that do not contain chunks that we want, in the hope that it - // will help the upper layers. - // - set erasures_total; - for (unsigned int i = 0; i < get_chunk_count(); ++i) { - if (available_chunks.count(i) == 0) - erasures_total.insert(i); - } - - for (vector::reverse_iterator i = layers.rbegin(); - i != layers.rend(); - ++i) { - set layer_erasures; - set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(), - erasures_total.begin(), erasures_total.end(), - inserter(layer_erasures, layer_erasures.end())); - // - // If this layer has no erasure, skip it - // - if (layer_erasures.empty()) - continue; - - if (layer_erasures.size() > 0 && - layer_erasures.size() <= i->erasure_code->get_coding_chunk_count()) { - // - // chunks recovered by this layer are removed from the list of - // erasures so that upper levels know they can rely on their - // availability - // - for (set::const_iterator j = layer_erasures.begin(); - j != layer_erasures.end(); - j++) { - erasures_total.erase(*j); - } - } - } - if (erasures_total.empty()) { - // - // Do not try to be smart about what chunks are necessary to - // recover, use all available chunks. - // - *minimum = available_chunks; - dout(20) << __func__ << " minimum == available_chunks == " - << available_chunks << dendl; - return 0; - } - } - - derr << __func__ << " not enough chunks in " << available_chunks - << " to read " << want_to_read << dendl; - return -EIO; -} - -int ErasureCodeLRC::encode_chunks(const set &want_to_encode, - map *encoded) -{ - unsigned int top = layers.size(); - for (vector::reverse_iterator i = layers.rbegin(); - i != layers.rend(); - ++i) { - --top; - if (includes(i->chunks_as_set.begin(), i->chunks_as_set.end(), - want_to_encode.begin(), want_to_encode.end())) - break; - } - - for (unsigned int i = top; i < layers.size(); ++i) { - const Layer &layer = layers[i]; - set layer_want_to_encode; - map layer_encoded; - int j = 0; - for (vector::const_iterator c = layer.chunks.begin(); - c != layer.chunks.end(); - c++) { - layer_encoded[j] = (*encoded)[*c]; - if (want_to_encode.find(*c) != want_to_encode.end()) - layer_want_to_encode.insert(j); - j++; - } - int err = layer.erasure_code->encode_chunks(layer_want_to_encode, - &layer_encoded); - if (err) { - derr << __func__ << " layer " << layer.chunks_map - << " failed with " << err << " trying to encode " - << layer_want_to_encode << dendl; - return err; - } - } - return 0; -} - -int ErasureCodeLRC::decode_chunks(const set &want_to_read, - const map &chunks, - map *decoded) -{ - set available_chunks; - set erasures; - for (unsigned int i = 0; i < get_chunk_count(); ++i) { - if (chunks.count(i) != 0) - available_chunks.insert(i); - else - erasures.insert(i); - } - - set want_to_read_erasures; - - for (vector::reverse_iterator layer = layers.rbegin(); - layer != layers.rend(); - ++layer) { - set layer_erasures; - set_intersection(layer->chunks_as_set.begin(), layer->chunks_as_set.end(), - erasures.begin(), erasures.end(), - inserter(layer_erasures, layer_erasures.end())); - - if (layer_erasures.size() > - layer->erasure_code->get_coding_chunk_count()) { - // skip because there are too many erasures for this layer to recover - } else if(layer_erasures.size() == 0) { - // skip because all chunks are already available - } else { - set layer_want_to_read; - map layer_chunks; - map layer_decoded; - int j = 0; - for (vector::const_iterator c = layer->chunks.begin(); - c != layer->chunks.end(); - c++) { - // - // Pick chunks from *decoded* instead of *chunks* to re-use - // chunks recovered by previous layers. In other words - // *chunks* does not change but *decoded* gradually improves - // as more layers recover from erasures. - // - if (erasures.count(*c) == 0) - layer_chunks[j] = (*decoded)[*c]; - if (want_to_read.count(*c) != 0) - layer_want_to_read.insert(j); - layer_decoded[j] = (*decoded)[*c]; - ++j; - } - int err = layer->erasure_code->decode_chunks(layer_want_to_read, - layer_chunks, - &layer_decoded); - if (err) { - derr << __func__ << " layer " << layer->chunks_map - << " failed with " << err << " trying to decode " - << layer_want_to_read << " with " << available_chunks << dendl; - return err; - } - j = 0; - for (vector::const_iterator c = layer->chunks.begin(); - c != layer->chunks.end(); - c++) { - (*decoded)[*c] = layer_decoded[j]; - ++j; - if (erasures.count(*c) != 0) - erasures.erase(*c); - } - want_to_read_erasures.clear(); - set_intersection(erasures.begin(), erasures.end(), - want_to_read.begin(), want_to_read.end(), - inserter(want_to_read_erasures, want_to_read_erasures.end())); - if (want_to_read_erasures.size() == 0) - break; - } - } - - if (want_to_read_erasures.size() > 0) { - derr << __func__ << " want to read " << want_to_read - << " with available_chunks = " << available_chunks - << " end up being unable to read " << want_to_read_erasures << dendl; - return -EIO; - } else { - return 0; - } -} diff --git a/src/erasure-code/LRC/ErasureCodeLRC.h b/src/erasure-code/LRC/ErasureCodeLRC.h deleted file mode 100644 index 3ad592cce983..000000000000 --- a/src/erasure-code/LRC/ErasureCodeLRC.h +++ /dev/null @@ -1,140 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Cloudwatt - * - * Author: Loic Dachary - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#ifndef CEPH_ERASURE_CODE_LRC_H -#define CEPH_ERASURE_CODE_LRC_H - -#include "include/err.h" -#include "json_spirit/json_spirit.h" -#include "erasure-code/ErasureCode.h" - -#define ERROR_LRC_ARRAY -(MAX_ERRNO + 1) -#define ERROR_LRC_OBJECT -(MAX_ERRNO + 2) -#define ERROR_LRC_INT -(MAX_ERRNO + 3) -#define ERROR_LRC_STR -(MAX_ERRNO + 4) -#define ERROR_LRC_PLUGIN -(MAX_ERRNO + 5) -#define ERROR_LRC_DESCRIPTION -(MAX_ERRNO + 6) -#define ERROR_LRC_PARSE_JSON -(MAX_ERRNO + 7) -#define ERROR_LRC_MAPPING -(MAX_ERRNO + 8) -#define ERROR_LRC_MAPPING_SIZE -(MAX_ERRNO + 9) -#define ERROR_LRC_FIRST_MAPPING -(MAX_ERRNO + 10) -#define ERROR_LRC_COUNT_CONSTRAINT -(MAX_ERRNO + 11) -#define ERROR_LRC_CONFIG_OPTIONS -(MAX_ERRNO + 12) -#define ERROR_LRC_LAYERS_COUNT -(MAX_ERRNO + 13) -#define ERROR_LRC_RULESET_OP -(MAX_ERRNO + 14) -#define ERROR_LRC_RULESET_TYPE -(MAX_ERRNO + 15) -#define ERROR_LRC_RULESET_N -(MAX_ERRNO + 16) -#define ERROR_LRC_ALL_OR_NOTHING -(MAX_ERRNO + 17) -#define ERROR_LRC_GENERATED -(MAX_ERRNO + 18) -#define ERROR_LRC_K_M_MODULO -(MAX_ERRNO + 19) -#define ERROR_LRC_K_MODULO -(MAX_ERRNO + 20) -#define ERROR_LRC_M_MODULO -(MAX_ERRNO + 21) - -class ErasureCodeLRC : public ErasureCode { -public: - struct Layer { - Layer(string _chunks_map) : chunks_map(_chunks_map) { } - ErasureCodeInterfaceRef erasure_code; - vector data; - vector coding; - vector chunks; - set chunks_as_set; - string chunks_map; - map parameters; - }; - vector layers; - string directory; - unsigned int chunk_count; - unsigned int data_chunk_count; - string ruleset_root; - struct Step { - Step(string _op, string _type, int _n) : - op(_op), - type(_type), - n(_n) {} - string op; - string type; - int n; - }; - vector ruleset_steps; - - ErasureCodeLRC() : - ruleset_root("default") - { - ruleset_steps.push_back(Step("chooseleaf", "host", 0)); - } - - virtual ~ErasureCodeLRC() {} - - set get_erasures(const set &need, - const set &available) const; - - virtual int minimum_to_decode(const set &want_to_read, - const set &available, - set *minimum); - - int layer_minimum_to_decode(const Layer &layer, - const set &want, - const set &available, - set *minimum) const; - - virtual int create_ruleset(const string &name, - CrushWrapper &crush, - ostream *ss) const; - - virtual unsigned int get_chunk_count() const { - return chunk_count; - } - - virtual unsigned int get_data_chunk_count() const { - return data_chunk_count; - } - - virtual unsigned int get_chunk_size(unsigned int object_size) const; - - int layer_encode(const Layer &layer, vector &chunks); - - virtual int encode_chunks(const set &want_to_encode, - map *encoded); - - virtual int decode_chunks(const set &want_to_read, - const map &chunks, - map *decoded); - - int init(const map ¶meters, ostream *ss); - - virtual int parse(const map ¶meters, ostream *ss); - - int parse_kml(map ¶meters, ostream *ss); - - int parse_ruleset(const map ¶meters, ostream *ss); - - int parse_ruleset_step(string description_string, - json_spirit::mArray description, - ostream *ss); - - int layers_description(const map ¶meters, - json_spirit::mArray *description, - ostream *ss) const; - int layers_parse(string description_string, - json_spirit::mArray description, - ostream *ss); - int layers_init(); - int layers_sanity_checks(string description_string, - ostream *ss) const; -}; - -#endif diff --git a/src/erasure-code/LRC/ErasureCodePluginLRC.cc b/src/erasure-code/LRC/ErasureCodePluginLRC.cc deleted file mode 100644 index ce4ae7c9d497..000000000000 --- a/src/erasure-code/LRC/ErasureCodePluginLRC.cc +++ /dev/null @@ -1,59 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Cloudwatt - * - * Author: Loic Dachary - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#include "ceph_ver.h" -#include "common/debug.h" -#include "erasure-code/ErasureCodePlugin.h" -#include "ErasureCodeLRC.h" - -// re-include our assert -#include "include/assert.h" - -#define dout_subsys ceph_subsys_osd -#undef dout_prefix -#define dout_prefix _prefix(_dout) - -static ostream& _prefix(std::ostream* _dout) -{ - return *_dout << "ErasureCodePluginLRC: "; -} - -class ErasureCodePluginLRC : public ErasureCodePlugin { -public: - virtual int factory(const map ¶meters, - ErasureCodeInterfaceRef *erasure_code) { - ErasureCodeLRC *interface; - interface = new ErasureCodeLRC(); - stringstream ss; - assert(parameters.count("directory") != 0); - int r = interface->init(parameters, &ss); - if (r) { - derr << ss.str() << dendl; - delete interface; - return r; - } - *erasure_code = ErasureCodeInterfaceRef(interface); - return 0; - } -}; - -const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; } - -int __erasure_code_init(char *plugin_name, char *directory) -{ - ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); - return instance.add(plugin_name, new ErasureCodePluginLRC()); -} diff --git a/src/erasure-code/LRC/Makefile.am b/src/erasure-code/LRC/Makefile.am deleted file mode 100644 index a1a9e23a3e64..000000000000 --- a/src/erasure-code/LRC/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -# LRC plugin -noinst_HEADERS += \ - erasure-code/LRC/ErasureCodeLRC.h - -LRC_sources = \ - erasure-code/ErasureCode.cc \ - erasure-code/LRC/ErasureCodePluginLRC.cc \ - erasure-code/LRC/ErasureCodeLRC.cc - -erasure-code/LRC/ErasureCodePluginLRC.cc: ./ceph_ver.h - -libec_LRC_la_SOURCES = ${LRC_sources} common/str_map.cc -libec_LRC_la_CFLAGS = ${AM_CFLAGS} -libec_LRC_la_CXXFLAGS= ${AM_CXXFLAGS} -libec_LRC_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(LIBJSON_SPIRIT) -libec_LRC_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0 -if LINUX -libec_LRC_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*' -endif - -erasure_codelib_LTLIBRARIES += libec_LRC.la diff --git a/src/erasure-code/Makefile.am b/src/erasure-code/Makefile.am index 223d16634aec..dc1db2113dd1 100644 --- a/src/erasure-code/Makefile.am +++ b/src/erasure-code/Makefile.am @@ -4,7 +4,7 @@ erasure_codelibdir = $(pkglibdir)/erasure-code erasure_codelib_LTLIBRARIES = include erasure-code/jerasure/Makefile.am -include erasure-code/LRC/Makefile.am +include erasure-code/lrc/Makefile.am if WITH_BETTER_YASM_ELF64 include erasure-code/isa/Makefile.am diff --git a/src/erasure-code/lrc/ErasureCodeLrc.cc b/src/erasure-code/lrc/ErasureCodeLrc.cc new file mode 100644 index 000000000000..caa78960504e --- /dev/null +++ b/src/erasure-code/lrc/ErasureCodeLrc.cc @@ -0,0 +1,835 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include "include/str_map.h" +#include "common/debug.h" +#include "crush/CrushWrapper.h" +#include "osd/osd_types.h" +#include "include/stringify.h" +#include "erasure-code/ErasureCodePlugin.h" +#include "json_spirit/json_spirit_writer.h" + +#include "ErasureCodeLrc.h" + +// re-include our assert to clobber boost's +#include "include/assert.h" + +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout) + +static ostream& _prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodeLrc: "; +} + +int ErasureCodeLrc::create_ruleset(const string &name, + CrushWrapper &crush, + ostream *ss) const +{ + if (crush.rule_exists(name)) { + *ss << "rule " << name << " exists"; + return -EEXIST; + } + if (!crush.name_exists(ruleset_root)) { + *ss << "root item " << ruleset_root << " does not exist"; + return -ENOENT; + } + int root = crush.get_item_id(ruleset_root); + + int ruleset = 0; + for (int i = 0; i < crush.get_max_rules(); i++) { + if (crush.rule_exists(i) && + crush.get_rule_mask_ruleset(i) >= ruleset) { + ruleset = crush.get_rule_mask_ruleset(i) + 1; + } + } + + int steps = 3 + ruleset_steps.size(); + int min_rep = 3; + int max_rep = 30; + crush_rule *rule = crush_make_rule(steps, ruleset, + pg_pool_t::TYPE_ERASURE, + min_rep, max_rep); + assert(rule); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + // [ [ "choose", "rack", 2 ], + // [ "chooseleaf", "host", 5 ] ] + for (vector::const_iterator i = ruleset_steps.begin(); + i != ruleset_steps.end(); + ++i) { + int op = i->op == "chooseleaf" ? + CRUSH_RULE_CHOOSELEAF_INDEP : CRUSH_RULE_CHOOSE_INDEP; + int type = crush.get_type_id(i->type); + if (type < 0) { + *ss << "unknown crush type " << i->type; + return -EINVAL; + } + crush_rule_set_step(rule, step++, op, i->n, type); + } + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + int rno = crush_add_rule(crush.crush, rule, -1); + crush.set_rule_name(rno, name); + + return ruleset; +} + +int ErasureCodeLrc::layers_description(const map ¶meters, + json_spirit::mArray *description, + ostream *ss) const +{ + if (parameters.count("layers") == 0) { + *ss << "could not find 'layers' in " << parameters << std::endl; + return ERROR_LRC_DESCRIPTION; + } + string str = parameters.find("layers")->second; + try { + json_spirit::mValue json; + json_spirit::read_or_throw(str, json); + + if (json.type() != json_spirit::array_type) { + *ss << "layers='" << str + << "' must be a JSON array but is of type " + << json.type() << " instead" << std::endl; + return ERROR_LRC_ARRAY; + } + *description = json.get_array(); + } catch (json_spirit::Error_position &e) { + *ss << "failed to parse layers='" << str << "'" + << " at line " << e.line_ << ", column " << e.column_ + << " : " << e.reason_ << std::endl; + return ERROR_LRC_PARSE_JSON; + } + return 0; +} + +int ErasureCodeLrc::layers_parse(string description_string, + json_spirit::mArray description, + ostream *ss) +{ + int position = 0; + for (vector::iterator i = description.begin(); + i != description.end(); + i++, position++) { + if (i->type() != json_spirit::array_type) { + stringstream json_string; + json_spirit::write(*i, json_string); + *ss << "each element of the array " + << description_string << " must be a JSON array but " + << json_string.str() << " at position " << position + << " is of type " << i->type() << " instead" << std::endl; + return ERROR_LRC_ARRAY; + } + json_spirit::mArray layer_json = i->get_array(); + map parameters; + int index = 0; + for (vector::iterator j = layer_json.begin(); + j != layer_json.end(); + ++j, ++index) { + if (index == 0) { + if (j->type() != json_spirit::str_type) { + stringstream element; + json_spirit::write(*j, element); + *ss << "the first element of the entry " + << element.str() << " (first is zero) " + << position << " in " << description_string + << " is of type " << (*j).type() << " instead of string" << std::endl; + return ERROR_LRC_STR; + } + layers.push_back(Layer(j->get_str())); + Layer &layer = layers.back(); + layer.chunks_map = j->get_str(); + } else if(index == 1) { + Layer &layer = layers.back(); + if (j->type() != json_spirit::str_type && + j->type() != json_spirit::obj_type) { + stringstream element; + json_spirit::write(*j, element); + *ss << "the second element of the entry " + << element.str() << " (first is zero) " + << position << " in " << description_string + << " is of type " << (*j).type() << " instead of string or object" + << std::endl; + return ERROR_LRC_CONFIG_OPTIONS; + } + if (j->type() == json_spirit::str_type) { + int err = get_json_str_map(j->get_str(), *ss, &layer.parameters); + if (err) + return err; + } else if (j->type() == json_spirit::obj_type) { + json_spirit::mObject o = j->get_obj(); + + for (map::iterator i = o.begin(); + i != o.end(); + ++i) { + layer.parameters[i->first] = i->second.get_str(); + } + } + } else { + // ignore trailing elements + } + } + } + return 0; +} + +int ErasureCodeLrc::layers_init() +{ + ErasureCodePluginRegistry ®istry = ErasureCodePluginRegistry::instance(); + int err; + for (unsigned int i = 0; i < layers.size(); i++) { + Layer &layer = layers[i]; + int position = 0; + for(std::string::iterator it = layer.chunks_map.begin(); + it != layer.chunks_map.end(); + ++it) { + if (*it == 'D') + layer.data.push_back(position); + if (*it == 'c') + layer.coding.push_back(position); + if (*it == 'c' || *it == 'D') + layer.chunks_as_set.insert(position); + position++; + } + layer.chunks = layer.data; + layer.chunks.insert(layer.chunks.end(), + layer.coding.begin(), layer.coding.end()); + if (layer.parameters.find("k") == layer.parameters.end()) + layer.parameters["k"] = stringify(layer.data.size()); + if (layer.parameters.find("m") == layer.parameters.end()) + layer.parameters["m"] = stringify(layer.coding.size()); + if (layer.parameters.find("plugin") == layer.parameters.end()) + layer.parameters["plugin"] = "jerasure"; + if (layer.parameters.find("technique") == layer.parameters.end()) + layer.parameters["technique"] = "reed_sol_van"; + if (layer.parameters.find("directory") == layer.parameters.end()) + layer.parameters["directory"] = directory; + stringstream ss; + err = registry.factory(layer.parameters["plugin"], + layer.parameters, + &layer.erasure_code, + ss); + if (err) { + derr << ss.str() << dendl; + return err; + } + } + return 0; +} + +int ErasureCodeLrc::layers_sanity_checks(string description_string, + ostream *ss) const +{ + int position = 0; + + if (layers.size() < 1) { + *ss << "layers parameter has " << layers.size() + << " which is less than the minimum of one. " + << description_string << std::endl; + return ERROR_LRC_LAYERS_COUNT; + } + for (vector::const_iterator layer = layers.begin(); + layer != layers.end(); + ++layer) { + if (chunk_count != layer->chunks_map.length()) { + *ss << "the first element of the array at position " + << position << " (starting from zero) " + << " is the string '" << layer->chunks_map + << " found in the layers parameter " + << description_string << ". It is expected to be " + << chunk_count << " characters long but is " + << layer->chunks_map.length() << " characters long instead " + << std::endl; + return ERROR_LRC_MAPPING_SIZE; + } + } + return 0; +} + +int ErasureCodeLrc::parse(const map ¶meters, + ostream *ss) +{ + int r = ErasureCode::parse(parameters, ss); + if (r) + return r; + + if (parameters.count("directory") != 0) + directory = parameters.find("directory")->second; + + return parse_ruleset(parameters, ss); +} + +int ErasureCodeLrc::parse_kml(map ¶meters, + ostream *ss) +{ + int err = ErasureCode::parse(parameters, ss); + const int DEFAULT = -1; + int k, m, l; + err |= to_int("k", parameters, &k, DEFAULT, ss); + err |= to_int("m", parameters, &m, DEFAULT, ss); + err |= to_int("l", parameters, &l, DEFAULT, ss); + + if (k == DEFAULT && m == DEFAULT && l == DEFAULT) + return 0; + + if ((k != DEFAULT || m != DEFAULT || l != DEFAULT) && + (k == DEFAULT || m == DEFAULT || l == DEFAULT)) { + *ss << "All of k, m, l must be set or none of them in " + << parameters << std::endl; + return ERROR_LRC_ALL_OR_NOTHING; + } + + const char *generated[] = { "mapping", + "layers", + "ruleset-steps" }; + + for (int i = 0; i < 3; i++) { + if (parameters.count(generated[i])) { + *ss << "The " << generated[i] << " parameter cannot be set " + << "when k, m, l are set in " << parameters << std::endl; + return ERROR_LRC_GENERATED; + } + } + + if ((k + m) % l) { + *ss << "k + m must be a multiple of l in " + << parameters << std::endl; + return ERROR_LRC_K_M_MODULO; + } + + int local_group_count = (k + m) / l; + + if (k % local_group_count) { + *ss << "k must be a multiple of (k + m) / l in " + << parameters << std::endl; + return ERROR_LRC_K_MODULO; + } + + if (m % local_group_count) { + *ss << "m must be a multiple of (k + m) / l in " + << parameters << std::endl; + return ERROR_LRC_M_MODULO; + } + + string mapping; + for (int i = 0; i < local_group_count; i++) { + mapping += string(k / local_group_count, 'D') + + string(m / local_group_count, '_') + "_"; + } + parameters["mapping"] = mapping; + + string layers = "[ "; + + // global layer + layers += " [ \""; + for (int i = 0; i < local_group_count; i++) { + layers += string(k / local_group_count, 'D') + + string(m / local_group_count, 'c') + "_"; + } + layers += "\", \"\" ],"; + + // local layers + for (int i = 0; i < local_group_count; i++) { + layers += " [ \""; + for (int j = 0; j < local_group_count; j++) { + if (i == j) + layers += string(l, 'D') + "c"; + else + layers += string(l + 1, '_'); + } + layers += "\", \"\" ],"; + } + parameters["layers"] = layers + "]"; + + map::const_iterator parameter; + string ruleset_locality; + parameter = parameters.find("ruleset-locality"); + if (parameter != parameters.end()) + ruleset_locality = parameter->second; + string ruleset_failure_domain = "host"; + parameter = parameters.find("ruleset-failure-domain"); + if (parameter != parameters.end()) + ruleset_failure_domain = parameter->second; + + if (ruleset_locality != "") { + ruleset_steps.clear(); + ruleset_steps.push_back(Step("choose", ruleset_locality, + local_group_count)); + ruleset_steps.push_back(Step("chooseleaf", ruleset_failure_domain, + l + 1)); + } else if (ruleset_failure_domain != "") { + ruleset_steps.clear(); + ruleset_steps.push_back(Step("chooseleaf", ruleset_failure_domain, 0)); + } + + return 0; +} + +int ErasureCodeLrc::parse_ruleset(const map ¶meters, + ostream *ss) +{ + map::const_iterator parameter; + parameter = parameters.find("ruleset-root"); + if (parameter != parameters.end()) + ruleset_root = parameter->second; + + if (parameters.count("ruleset-steps") != 0) { + ruleset_steps.clear(); + string str = parameters.find("ruleset-steps")->second; + json_spirit::mArray description; + try { + json_spirit::mValue json; + json_spirit::read_or_throw(str, json); + + if (json.type() != json_spirit::array_type) { + *ss << "ruleset-steps='" << str + << "' must be a JSON array but is of type " + << json.type() << " instead" << std::endl; + return ERROR_LRC_ARRAY; + } + description = json.get_array(); + } catch (json_spirit::Error_position &e) { + *ss << "failed to parse ruleset-steps='" << str << "'" + << " at line " << e.line_ << ", column " << e.column_ + << " : " << e.reason_ << std::endl; + return ERROR_LRC_PARSE_JSON; + } + + int position = 0; + for (vector::iterator i = description.begin(); + i != description.end(); + i++, position++) { + if (i->type() != json_spirit::array_type) { + stringstream json_string; + json_spirit::write(*i, json_string); + *ss << "element of the array " + << str << " must be a JSON array but " + << json_string.str() << " at position " << position + << " is of type " << i->type() << " instead" << std::endl; + return ERROR_LRC_ARRAY; + } + int r = parse_ruleset_step(str, i->get_array(), ss); + if (r) + return r; + } + } + return 0; +} + +int ErasureCodeLrc::parse_ruleset_step(string description_string, + json_spirit::mArray description, + ostream *ss) +{ + stringstream json_string; + json_spirit::write(description, json_string); + string op; + string type; + int n; + int position = 0; + for (vector::iterator i = description.begin(); + i != description.end(); + i++, position++) { + if ((position == 0 || position == 1) && + i->type() != json_spirit::str_type) { + *ss << "element " << position << " of the array " + << json_string.str() << " found in " << description_string + << " must be a JSON string but is of type " + << i->type() << " instead" << std::endl; + return position == 0 ? ERROR_LRC_RULESET_OP : ERROR_LRC_RULESET_TYPE; + } + if (position == 2 && i->type() != json_spirit::int_type) { + *ss << "element " << position << " of the array " + << json_string.str() << " found in " << description_string + << " must be a JSON int but is of type " + << i->type() << " instead" << std::endl; + return ERROR_LRC_RULESET_N; + } + + if (position == 0) + op = i->get_str(); + if (position == 1) + type = i->get_str(); + if (position == 2) + n = i->get_int(); + } + ruleset_steps.push_back(Step(op, type, n)); + return 0; +} + +int ErasureCodeLrc::init(const map ¶meters, + ostream *ss) +{ + int r; + + map parameters_rw = parameters; + r = parse_kml(parameters_rw, ss); + if (r) + return r; + + r = parse(parameters_rw, ss); + if (r) + return r; + + json_spirit::mArray description; + r = layers_description(parameters_rw, &description, ss); + if (r) + return r; + + string description_string = parameters_rw.find("layers")->second; + + dout(10) << "init(" << description_string << ")" << dendl; + + r = layers_parse(description_string, description, ss); + if (r) + return r; + + r = layers_init(); + if (r) + return r; + + if (parameters_rw.count("mapping") == 0) { + *ss << "the 'mapping' parameter is missing from " << parameters_rw; + return ERROR_LRC_MAPPING; + } + string mapping = parameters_rw.find("mapping")->second; + data_chunk_count = 0; + for(std::string::iterator it = mapping.begin(); it != mapping.end(); ++it) { + if (*it == 'D') + data_chunk_count++; + } + chunk_count = mapping.length(); + + return layers_sanity_checks(description_string, ss); +} + +set ErasureCodeLrc::get_erasures(const set &want, + const set &available) const +{ + set result; + set_difference(want.begin(), want.end(), + available.begin(), available.end(), + inserter(result, result.end())); + return result; +} + +unsigned int ErasureCodeLrc::get_chunk_size(unsigned int object_size) const +{ + return layers.front().erasure_code->get_chunk_size(object_size); +} + +void p(const set &s) { cerr << s; } // for gdb + +int ErasureCodeLrc::minimum_to_decode(const set &want_to_read, + const set &available_chunks, + set *minimum) +{ + dout(20) << __func__ << " want_to_read " << want_to_read + << " available_chunks " << available_chunks << dendl; + { + set erasures_total; + set erasures_not_recovered; + set erasures_want; + for (unsigned int i = 0; i < get_chunk_count(); ++i) { + if (available_chunks.count(i) == 0) { + erasures_total.insert(i); + erasures_not_recovered.insert(i); + if (want_to_read.count(i) != 0) + erasures_want.insert(i); + } + } + + // + // Case 1: + // + // When no chunk is missing there is no need to read more than what + // is wanted. + // + if (erasures_want.empty()) { + *minimum = want_to_read; + dout(20) << __func__ << " minimum == want_to_read == " + << want_to_read << dendl; + return 0; + } + + // + // Case 2: + // + // Try to recover erasures with as few chunks as possible. + // + for (vector::reverse_iterator i = layers.rbegin(); + i != layers.rend(); + ++i) { + // + // If this layer has no chunk that we want, skip it. + // + set layer_want; + set_intersection(want_to_read.begin(), want_to_read.end(), + i->chunks_as_set.begin(), i->chunks_as_set.end(), + inserter(layer_want, layer_want.end())); + if (layer_want.empty()) + continue; + // + // Are some of the chunks we want missing ? + // + set layer_erasures; + set_intersection(layer_want.begin(), layer_want.end(), + erasures_want.begin(), erasures_want.end(), + inserter(layer_erasures, layer_erasures.end())); + set layer_minimum; + if (layer_erasures.empty()) { + // + // The chunks we want are available, this is the minimum we need + // to read. + // + layer_minimum = layer_want; + } else { + set erasures; + set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(), + erasures_not_recovered.begin(), erasures_not_recovered.end(), + inserter(erasures, erasures.end())); + + if (erasures.size() > i->erasure_code->get_coding_chunk_count()) { + // + // There are too many erasures for this layer to recover: skip + // it and hope that an upper layer will be do better. + // + continue; + } else { + // + // Get all available chunks in that layer to recover the + // missing one(s). + // + set_difference(i->chunks_as_set.begin(), i->chunks_as_set.end(), + erasures_not_recovered.begin(), erasures_not_recovered.end(), + inserter(layer_minimum, layer_minimum.end())); + // + // Chunks recovered by this layer are removed from the list of + // erasures so that upper levels do not attempt to recover + // them. + // + for (set::const_iterator j = erasures.begin(); + j != erasures.end(); + j++) { + erasures_not_recovered.erase(*j); + if (erasures_want.count(*j)) + erasures_want.erase(*j); + } + } + } + minimum->insert(layer_minimum.begin(), layer_minimum.end()); + } + if (erasures_want.empty()) { + minimum->insert(want_to_read.begin(), want_to_read.end()); + for (set::const_iterator i = erasures_total.begin(); + i != erasures_total.end(); + i++) { + if (minimum->count(*i)) + minimum->erase(*i); + } + dout(20) << __func__ << " minimum = " << *minimum << dendl; + return 0; + } + } + + { + // + // Case 3: + // + // The previous strategy failed to recover from all erasures. + // + // Try to recover as many chunks as possible, even from layers + // that do not contain chunks that we want, in the hope that it + // will help the upper layers. + // + set erasures_total; + for (unsigned int i = 0; i < get_chunk_count(); ++i) { + if (available_chunks.count(i) == 0) + erasures_total.insert(i); + } + + for (vector::reverse_iterator i = layers.rbegin(); + i != layers.rend(); + ++i) { + set layer_erasures; + set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(), + erasures_total.begin(), erasures_total.end(), + inserter(layer_erasures, layer_erasures.end())); + // + // If this layer has no erasure, skip it + // + if (layer_erasures.empty()) + continue; + + if (layer_erasures.size() > 0 && + layer_erasures.size() <= i->erasure_code->get_coding_chunk_count()) { + // + // chunks recovered by this layer are removed from the list of + // erasures so that upper levels know they can rely on their + // availability + // + for (set::const_iterator j = layer_erasures.begin(); + j != layer_erasures.end(); + j++) { + erasures_total.erase(*j); + } + } + } + if (erasures_total.empty()) { + // + // Do not try to be smart about what chunks are necessary to + // recover, use all available chunks. + // + *minimum = available_chunks; + dout(20) << __func__ << " minimum == available_chunks == " + << available_chunks << dendl; + return 0; + } + } + + derr << __func__ << " not enough chunks in " << available_chunks + << " to read " << want_to_read << dendl; + return -EIO; +} + +int ErasureCodeLrc::encode_chunks(const set &want_to_encode, + map *encoded) +{ + unsigned int top = layers.size(); + for (vector::reverse_iterator i = layers.rbegin(); + i != layers.rend(); + ++i) { + --top; + if (includes(i->chunks_as_set.begin(), i->chunks_as_set.end(), + want_to_encode.begin(), want_to_encode.end())) + break; + } + + for (unsigned int i = top; i < layers.size(); ++i) { + const Layer &layer = layers[i]; + set layer_want_to_encode; + map layer_encoded; + int j = 0; + for (vector::const_iterator c = layer.chunks.begin(); + c != layer.chunks.end(); + c++) { + layer_encoded[j] = (*encoded)[*c]; + if (want_to_encode.find(*c) != want_to_encode.end()) + layer_want_to_encode.insert(j); + j++; + } + int err = layer.erasure_code->encode_chunks(layer_want_to_encode, + &layer_encoded); + if (err) { + derr << __func__ << " layer " << layer.chunks_map + << " failed with " << err << " trying to encode " + << layer_want_to_encode << dendl; + return err; + } + } + return 0; +} + +int ErasureCodeLrc::decode_chunks(const set &want_to_read, + const map &chunks, + map *decoded) +{ + set available_chunks; + set erasures; + for (unsigned int i = 0; i < get_chunk_count(); ++i) { + if (chunks.count(i) != 0) + available_chunks.insert(i); + else + erasures.insert(i); + } + + set want_to_read_erasures; + + for (vector::reverse_iterator layer = layers.rbegin(); + layer != layers.rend(); + ++layer) { + set layer_erasures; + set_intersection(layer->chunks_as_set.begin(), layer->chunks_as_set.end(), + erasures.begin(), erasures.end(), + inserter(layer_erasures, layer_erasures.end())); + + if (layer_erasures.size() > + layer->erasure_code->get_coding_chunk_count()) { + // skip because there are too many erasures for this layer to recover + } else if(layer_erasures.size() == 0) { + // skip because all chunks are already available + } else { + set layer_want_to_read; + map layer_chunks; + map layer_decoded; + int j = 0; + for (vector::const_iterator c = layer->chunks.begin(); + c != layer->chunks.end(); + c++) { + // + // Pick chunks from *decoded* instead of *chunks* to re-use + // chunks recovered by previous layers. In other words + // *chunks* does not change but *decoded* gradually improves + // as more layers recover from erasures. + // + if (erasures.count(*c) == 0) + layer_chunks[j] = (*decoded)[*c]; + if (want_to_read.count(*c) != 0) + layer_want_to_read.insert(j); + layer_decoded[j] = (*decoded)[*c]; + ++j; + } + int err = layer->erasure_code->decode_chunks(layer_want_to_read, + layer_chunks, + &layer_decoded); + if (err) { + derr << __func__ << " layer " << layer->chunks_map + << " failed with " << err << " trying to decode " + << layer_want_to_read << " with " << available_chunks << dendl; + return err; + } + j = 0; + for (vector::const_iterator c = layer->chunks.begin(); + c != layer->chunks.end(); + c++) { + (*decoded)[*c] = layer_decoded[j]; + ++j; + if (erasures.count(*c) != 0) + erasures.erase(*c); + } + want_to_read_erasures.clear(); + set_intersection(erasures.begin(), erasures.end(), + want_to_read.begin(), want_to_read.end(), + inserter(want_to_read_erasures, want_to_read_erasures.end())); + if (want_to_read_erasures.size() == 0) + break; + } + } + + if (want_to_read_erasures.size() > 0) { + derr << __func__ << " want to read " << want_to_read + << " with available_chunks = " << available_chunks + << " end up being unable to read " << want_to_read_erasures << dendl; + return -EIO; + } else { + return 0; + } +} diff --git a/src/erasure-code/lrc/ErasureCodeLrc.h b/src/erasure-code/lrc/ErasureCodeLrc.h new file mode 100644 index 000000000000..4214cb63ba61 --- /dev/null +++ b/src/erasure-code/lrc/ErasureCodeLrc.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_LRC_H +#define CEPH_ERASURE_CODE_LRC_H + +#include "include/err.h" +#include "json_spirit/json_spirit.h" +#include "erasure-code/ErasureCode.h" + +#define ERROR_LRC_ARRAY -(MAX_ERRNO + 1) +#define ERROR_LRC_OBJECT -(MAX_ERRNO + 2) +#define ERROR_LRC_INT -(MAX_ERRNO + 3) +#define ERROR_LRC_STR -(MAX_ERRNO + 4) +#define ERROR_LRC_PLUGIN -(MAX_ERRNO + 5) +#define ERROR_LRC_DESCRIPTION -(MAX_ERRNO + 6) +#define ERROR_LRC_PARSE_JSON -(MAX_ERRNO + 7) +#define ERROR_LRC_MAPPING -(MAX_ERRNO + 8) +#define ERROR_LRC_MAPPING_SIZE -(MAX_ERRNO + 9) +#define ERROR_LRC_FIRST_MAPPING -(MAX_ERRNO + 10) +#define ERROR_LRC_COUNT_CONSTRAINT -(MAX_ERRNO + 11) +#define ERROR_LRC_CONFIG_OPTIONS -(MAX_ERRNO + 12) +#define ERROR_LRC_LAYERS_COUNT -(MAX_ERRNO + 13) +#define ERROR_LRC_RULESET_OP -(MAX_ERRNO + 14) +#define ERROR_LRC_RULESET_TYPE -(MAX_ERRNO + 15) +#define ERROR_LRC_RULESET_N -(MAX_ERRNO + 16) +#define ERROR_LRC_ALL_OR_NOTHING -(MAX_ERRNO + 17) +#define ERROR_LRC_GENERATED -(MAX_ERRNO + 18) +#define ERROR_LRC_K_M_MODULO -(MAX_ERRNO + 19) +#define ERROR_LRC_K_MODULO -(MAX_ERRNO + 20) +#define ERROR_LRC_M_MODULO -(MAX_ERRNO + 21) + +class ErasureCodeLrc : public ErasureCode { +public: + struct Layer { + Layer(string _chunks_map) : chunks_map(_chunks_map) { } + ErasureCodeInterfaceRef erasure_code; + vector data; + vector coding; + vector chunks; + set chunks_as_set; + string chunks_map; + map parameters; + }; + vector layers; + string directory; + unsigned int chunk_count; + unsigned int data_chunk_count; + string ruleset_root; + struct Step { + Step(string _op, string _type, int _n) : + op(_op), + type(_type), + n(_n) {} + string op; + string type; + int n; + }; + vector ruleset_steps; + + ErasureCodeLrc() : + ruleset_root("default") + { + ruleset_steps.push_back(Step("chooseleaf", "host", 0)); + } + + virtual ~ErasureCodeLrc() {} + + set get_erasures(const set &need, + const set &available) const; + + virtual int minimum_to_decode(const set &want_to_read, + const set &available, + set *minimum); + + int layer_minimum_to_decode(const Layer &layer, + const set &want, + const set &available, + set *minimum) const; + + virtual int create_ruleset(const string &name, + CrushWrapper &crush, + ostream *ss) const; + + virtual unsigned int get_chunk_count() const { + return chunk_count; + } + + virtual unsigned int get_data_chunk_count() const { + return data_chunk_count; + } + + virtual unsigned int get_chunk_size(unsigned int object_size) const; + + int layer_encode(const Layer &layer, vector &chunks); + + virtual int encode_chunks(const set &want_to_encode, + map *encoded); + + virtual int decode_chunks(const set &want_to_read, + const map &chunks, + map *decoded); + + int init(const map ¶meters, ostream *ss); + + virtual int parse(const map ¶meters, ostream *ss); + + int parse_kml(map ¶meters, ostream *ss); + + int parse_ruleset(const map ¶meters, ostream *ss); + + int parse_ruleset_step(string description_string, + json_spirit::mArray description, + ostream *ss); + + int layers_description(const map ¶meters, + json_spirit::mArray *description, + ostream *ss) const; + int layers_parse(string description_string, + json_spirit::mArray description, + ostream *ss); + int layers_init(); + int layers_sanity_checks(string description_string, + ostream *ss) const; +}; + +#endif diff --git a/src/erasure-code/lrc/ErasureCodePluginLrc.cc b/src/erasure-code/lrc/ErasureCodePluginLrc.cc new file mode 100644 index 000000000000..dfb680a0f6a5 --- /dev/null +++ b/src/erasure-code/lrc/ErasureCodePluginLrc.cc @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include "ceph_ver.h" +#include "common/debug.h" +#include "erasure-code/ErasureCodePlugin.h" +#include "ErasureCodeLrc.h" + +// re-include our assert +#include "include/assert.h" + +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout) + +static ostream& _prefix(std::ostream* _dout) +{ + return *_dout << "ErasureCodePluginLrc: "; +} + +class ErasureCodePluginLrc : public ErasureCodePlugin { +public: + virtual int factory(const map ¶meters, + ErasureCodeInterfaceRef *erasure_code) { + ErasureCodeLrc *interface; + interface = new ErasureCodeLrc(); + stringstream ss; + assert(parameters.count("directory") != 0); + int r = interface->init(parameters, &ss); + if (r) { + derr << ss.str() << dendl; + delete interface; + return r; + } + *erasure_code = ErasureCodeInterfaceRef(interface); + return 0; + } +}; + +const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; } + +int __erasure_code_init(char *plugin_name, char *directory) +{ + ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); + return instance.add(plugin_name, new ErasureCodePluginLrc()); +} diff --git a/src/erasure-code/lrc/Makefile.am b/src/erasure-code/lrc/Makefile.am new file mode 100644 index 000000000000..c6547a3ea529 --- /dev/null +++ b/src/erasure-code/lrc/Makefile.am @@ -0,0 +1,21 @@ +# lrc plugin +noinst_HEADERS += \ + erasure-code/lrc/ErasureCodeLrc.h + +lrc_sources = \ + erasure-code/ErasureCode.cc \ + erasure-code/lrc/ErasureCodePluginLrc.cc \ + erasure-code/lrc/ErasureCodeLrc.cc + +erasure-code/lrc/ErasureCodePluginLrc.cc: ./ceph_ver.h + +libec_lrc_la_SOURCES = ${lrc_sources} common/str_map.cc +libec_lrc_la_CFLAGS = ${AM_CFLAGS} +libec_lrc_la_CXXFLAGS= ${AM_CXXFLAGS} +libec_lrc_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(LIBJSON_SPIRIT) +libec_lrc_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0 +if LINUX +libec_lrc_la_LDFLAGS += -export-symbols-regex '.*__erasure_code_.*' +endif + +erasure_codelib_LTLIBRARIES += libec_lrc.la diff --git a/src/test/erasure-code/Makefile.am b/src/test/erasure-code/Makefile.am index d69e80e38e75..6cef35958e71 100644 --- a/src/test/erasure-code/Makefile.am +++ b/src/test/erasure-code/Makefile.am @@ -144,24 +144,24 @@ endif check_PROGRAMS += unittest_erasure_code_plugin_isa endif -unittest_erasure_code_LRC_SOURCES = \ - test/erasure-code/TestErasureCodeLRC.cc \ - ${LRC_sources} -unittest_erasure_code_LRC_CXXFLAGS = $(UNITTEST_CXXFLAGS) -unittest_erasure_code_LRC_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) +unittest_erasure_code_lrc_SOURCES = \ + test/erasure-code/TestErasureCodeLrc.cc \ + ${lrc_sources} +unittest_erasure_code_lrc_CXXFLAGS = $(UNITTEST_CXXFLAGS) +unittest_erasure_code_lrc_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) if LINUX -unittest_erasure_code_LRC_LDADD += -ldl +unittest_erasure_code_lrc_LDADD += -ldl endif -check_PROGRAMS += unittest_erasure_code_LRC +check_PROGRAMS += unittest_erasure_code_lrc -unittest_erasure_code_plugin_LRC_SOURCES = \ - test/erasure-code/TestErasureCodePluginLRC.cc -unittest_erasure_code_plugin_LRC_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} -unittest_erasure_code_plugin_LRC_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) +unittest_erasure_code_plugin_lrc_SOURCES = \ + test/erasure-code/TestErasureCodePluginLrc.cc +unittest_erasure_code_plugin_lrc_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} +unittest_erasure_code_plugin_lrc_LDADD = $(LIBOSD) $(LIBCOMMON) $(UNITTEST_LDADD) $(CEPH_GLOBAL) if LINUX -unittest_erasure_code_plugin_LRC_LDADD += -ldl +unittest_erasure_code_plugin_lrc_LDADD += -ldl endif -check_PROGRAMS += unittest_erasure_code_plugin_LRC +check_PROGRAMS += unittest_erasure_code_plugin_lrc unittest_erasure_code_example_SOURCES = \ erasure-code/ErasureCode.cc \ diff --git a/src/test/erasure-code/TestErasureCodeLRC.cc b/src/test/erasure-code/TestErasureCodeLRC.cc deleted file mode 100644 index 6fcd8a064f95..000000000000 --- a/src/test/erasure-code/TestErasureCodeLRC.cc +++ /dev/null @@ -1,935 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph distributed storage system - * - * Copyright (C) 2014 Cloudwatt - * - * Author: Loic Dachary - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#include - -#include "crush/CrushWrapper.h" -#include "common/config.h" -#include "include/stringify.h" -#include "global/global_init.h" -#include "erasure-code/LRC/ErasureCodeLRC.h" -#include "common/ceph_argparse.h" -#include "global/global_context.h" -#include "gtest/gtest.h" - -TEST(ErasureCodeLRC, parse_ruleset) -{ - ErasureCodeLRC LRC; - EXPECT_EQ("default", LRC.ruleset_root); - EXPECT_EQ("host", LRC.ruleset_steps.front().type); - - map parameters; - parameters["ruleset-root"] = "other"; - EXPECT_EQ(0, LRC.parse_ruleset(parameters, &cerr)); - EXPECT_EQ("other", LRC.ruleset_root); - - parameters["ruleset-steps"] = "[]"; - EXPECT_EQ(0, LRC.parse_ruleset(parameters, &cerr)); - EXPECT_TRUE(LRC.ruleset_steps.empty()); - - parameters["ruleset-steps"] = "0"; - EXPECT_EQ(ERROR_LRC_ARRAY, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "{"; - EXPECT_EQ(ERROR_LRC_PARSE_JSON, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "[0]"; - EXPECT_EQ(ERROR_LRC_ARRAY, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "[[0]]"; - EXPECT_EQ(ERROR_LRC_RULESET_OP, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "[[\"choose\", 0]]"; - EXPECT_EQ(ERROR_LRC_RULESET_TYPE, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "[[\"choose\", \"host\", []]]"; - EXPECT_EQ(ERROR_LRC_RULESET_N, LRC.parse_ruleset(parameters, &cerr)); - - parameters["ruleset-steps"] = "[[\"choose\", \"host\", 2]]"; - EXPECT_EQ(0, LRC.parse_ruleset(parameters, &cerr)); - - const ErasureCodeLRC::Step &step = LRC.ruleset_steps.front(); - EXPECT_EQ("choose", step.op); - EXPECT_EQ("host", step.type); - EXPECT_EQ(2, step.n); - - parameters["ruleset-steps"] = - "[" - " [\"choose\", \"rack\", 2], " - " [\"chooseleaf\", \"host\", 5], " - "]"; - EXPECT_EQ(0, LRC.parse_ruleset(parameters, &cerr)); - EXPECT_EQ(2U, LRC.ruleset_steps.size()); - { - const ErasureCodeLRC::Step &step = LRC.ruleset_steps[0]; - EXPECT_EQ("choose", step.op); - EXPECT_EQ("rack", step.type); - EXPECT_EQ(2, step.n); - } - { - const ErasureCodeLRC::Step &step = LRC.ruleset_steps[1]; - EXPECT_EQ("chooseleaf", step.op); - EXPECT_EQ("host", step.type); - EXPECT_EQ(5, step.n); - } -} - -TEST(ErasureCodeTest, create_ruleset) -{ - CrushWrapper *c = new CrushWrapper; - c->create(); - int root_type = 3; - c->set_type_name(root_type, "root"); - int rack_type = 2; - c->set_type_name(rack_type, "rack"); - int host_type = 1; - c->set_type_name(host_type, "host"); - int osd_type = 0; - c->set_type_name(osd_type, "osd"); - - int rootno; - c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, - root_type, 0, NULL, NULL, &rootno); - c->set_item_name(rootno, "default"); - - map loc; - loc["root"] = "default"; - - // - // Set all to 10 so that the item number it trivial to decompose - // into rack/host/osd. - // - int num_rack; - int num_host; - int num_osd; - num_rack = num_host = num_osd = 10; - int osd = 0; - for (int r=0; rinsert_item(g_ceph_context, osd, 1.0, string("osd.") + stringify(osd), loc); - } - } - } - - ErasureCodeLRC LRC; - EXPECT_EQ(0, LRC.create_ruleset("rule1", *c, &cerr)); - - map parameters; - unsigned int racks = 2; - unsigned int hosts = 5; - parameters["ruleset-steps"] = - "[" - " [\"choose\", \"rack\", " + stringify(racks) + "], " - " [\"chooseleaf\", \"host\", " + stringify(hosts) + "], " - "]"; - const char *rule_name = "rule2"; - EXPECT_EQ(0, LRC.parse_ruleset(parameters, &cerr)); - EXPECT_EQ(1, LRC.create_ruleset(rule_name, *c, &cerr)); - - vector<__u32> weight; - for (int o = 0; o < c->get_max_devices(); o++) - weight.push_back(0x10000); - int rule = c->get_rule_id(rule_name); - vector out; - unsigned int n = racks * hosts; - c->do_rule(rule, 1, out, n, weight); - EXPECT_EQ(n, out.size()); - // - // check that the first five are in the same rack and the next five - // in the same rack - // - int first_rack = out[0] / num_host / num_osd; - EXPECT_EQ(first_rack, out[1] / num_host / num_osd); - EXPECT_EQ(first_rack, out[2] / num_host / num_osd); - EXPECT_EQ(first_rack, out[3] / num_host / num_osd); - EXPECT_EQ(first_rack, out[4] / num_host / num_osd); - int second_rack = out[5] / num_host / num_osd; - EXPECT_EQ(second_rack, out[6] / num_host / num_osd); - EXPECT_EQ(second_rack, out[7] / num_host / num_osd); - EXPECT_EQ(second_rack, out[8] / num_host / num_osd); - EXPECT_EQ(second_rack, out[9] / num_host / num_osd); -} - -TEST(ErasureCodeLRC, parse_kml) -{ - ErasureCodeLRC LRC; - map parameters; - EXPECT_EQ(0, LRC.parse_kml(parameters, &cerr)); - parameters["k"] = "4"; - EXPECT_EQ(ERROR_LRC_ALL_OR_NOTHING, LRC.parse_kml(parameters, &cerr)); - const char *generated[] = { "mapping", - "layers", - "ruleset-steps" }; - parameters["m"] = "2"; - parameters["l"] = "3"; - - for (int i = 0; i < 3; i++) { - parameters[generated[i]] = "SET"; - EXPECT_EQ(ERROR_LRC_GENERATED, LRC.parse_kml(parameters, &cerr)); - parameters.erase(parameters.find(generated[i])); - } - - parameters["k"] = "4"; - parameters["m"] = "2"; - parameters["l"] = "7"; - EXPECT_EQ(ERROR_LRC_K_M_MODULO, LRC.parse_kml(parameters, &cerr)); - - parameters["k"] = "3"; - parameters["m"] = "3"; - parameters["l"] = "3"; - EXPECT_EQ(ERROR_LRC_K_MODULO, LRC.parse_kml(parameters, &cerr)); - - parameters["k"] = "4"; - parameters["m"] = "2"; - parameters["l"] = "3"; - EXPECT_EQ(0, LRC.parse_kml(parameters, &cerr)); - EXPECT_EQ("[ " - " [ \"DDc_DDc_\", \"\" ]," - " [ \"DDDc____\", \"\" ]," - " [ \"____DDDc\", \"\" ]," - "]", parameters["layers"]); - EXPECT_EQ("DD__DD__", parameters["mapping"]); - EXPECT_EQ("chooseleaf", LRC.ruleset_steps[0].op); - EXPECT_EQ("host", LRC.ruleset_steps[0].type); - EXPECT_EQ(0, LRC.ruleset_steps[0].n); - EXPECT_EQ(1U, LRC.ruleset_steps.size()); - parameters.erase(parameters.find("mapping")); - parameters.erase(parameters.find("layers")); - - parameters["k"] = "4"; - parameters["m"] = "2"; - parameters["l"] = "3"; - parameters["ruleset-failure-domain"] = "osd"; - EXPECT_EQ(0, LRC.parse_kml(parameters, &cerr)); - EXPECT_EQ("chooseleaf", LRC.ruleset_steps[0].op); - EXPECT_EQ("osd", LRC.ruleset_steps[0].type); - EXPECT_EQ(0, LRC.ruleset_steps[0].n); - EXPECT_EQ(1U, LRC.ruleset_steps.size()); - parameters.erase(parameters.find("mapping")); - parameters.erase(parameters.find("layers")); - - parameters["k"] = "4"; - parameters["m"] = "2"; - parameters["l"] = "3"; - parameters["ruleset-failure-domain"] = "osd"; - parameters["ruleset-locality"] = "rack"; - EXPECT_EQ(0, LRC.parse_kml(parameters, &cerr)); - EXPECT_EQ("choose", LRC.ruleset_steps[0].op); - EXPECT_EQ("rack", LRC.ruleset_steps[0].type); - EXPECT_EQ(2, LRC.ruleset_steps[0].n); - EXPECT_EQ("chooseleaf", LRC.ruleset_steps[1].op); - EXPECT_EQ("osd", LRC.ruleset_steps[1].type); - EXPECT_EQ(4, LRC.ruleset_steps[1].n); - EXPECT_EQ(2U, LRC.ruleset_steps.size()); - parameters.erase(parameters.find("mapping")); - parameters.erase(parameters.find("layers")); -} - -TEST(ErasureCodeLRC, layers_description) -{ - ErasureCodeLRC LRC; - map parameters; - - json_spirit::mArray description; - EXPECT_EQ(ERROR_LRC_DESCRIPTION, - LRC.layers_description(parameters, &description, &cerr)); - - { - const char *description_string = "\"not an array\""; - parameters["layers"] = description_string; - EXPECT_EQ(ERROR_LRC_ARRAY, - LRC.layers_description(parameters, &description, &cerr)); - } - { - const char *description_string = "invalid json"; - parameters["layers"] = description_string; - EXPECT_EQ(ERROR_LRC_PARSE_JSON, - LRC.layers_description(parameters, &description, &cerr)); - } - { - const char *description_string = "[]"; - parameters["layers"] = description_string; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - } -} - -TEST(ErasureCodeLRC, layers_parse) -{ - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string ="[ 0 ]"; - parameters["layers"] = description_string; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(ERROR_LRC_ARRAY, - LRC.layers_parse(description_string, description, &cerr)); - } - - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string ="[ [ 0 ] ]"; - parameters["layers"] = description_string; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(ERROR_LRC_STR, - LRC.layers_parse(description_string, description, &cerr)); - } - - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string ="[ [ \"\", 0 ] ]"; - parameters["layers"] = description_string; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(ERROR_LRC_CONFIG_OPTIONS, - LRC.layers_parse(description_string, description, &cerr)); - } - - // - // The second element can be an object describing the plugin - // parameters. - // - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string ="[ [ \"\", { \"a\": \"b\" }, \"ignored\" ] ]"; - parameters["layers"] = description_string; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(0, LRC.layers_parse(description_string, description, &cerr)); - EXPECT_EQ("b", LRC.layers.front().parameters["a"]); - } - - // - // The second element can be a str_map parseable string describing the plugin - // parameters. - // - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string ="[ [ \"\", \"a=b c=d\" ] ]"; - parameters["layers"] = description_string; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(0, LRC.layers_parse(description_string, description, &cerr)); - EXPECT_EQ("b", LRC.layers.front().parameters["a"]); - EXPECT_EQ("d", LRC.layers.front().parameters["c"]); - } - -} - -TEST(ErasureCodeLRC, layers_sanity_checks) -{ - { - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DDD__DD"; - parameters["directory"] = ".libs"; - const char *description_string = - "[ " - " [ \"_cDDD_cDD\", \"\" ]," - " [ \"c_DDD____\", \"\" ]," - " [ \"_____cDDD\", \"\" ]," - "]"; - parameters["layers"] = description_string; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - } - { - ErasureCodeLRC LRC; - map parameters; - const char *description_string = - "[ " - "]"; - parameters["layers"] = description_string; - EXPECT_EQ(ERROR_LRC_MAPPING, LRC.init(parameters, &cerr)); - } - { - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = ""; - const char *description_string = - "[ " - "]"; - parameters["layers"] = description_string; - EXPECT_EQ(ERROR_LRC_LAYERS_COUNT, LRC.init(parameters, &cerr)); - } - { - ErasureCodeLRC LRC; - map parameters; - parameters["directory"] = ".libs"; - parameters["mapping"] = - "AA"; - const char *description_string = - "[ " - " [ \"AA??\", \"\" ], " - " [ \"AA\", \"\" ], " - " [ \"AA\", \"\" ], " - "]"; - parameters["layers"] = description_string; - EXPECT_EQ(ERROR_LRC_MAPPING_SIZE, LRC.init(parameters, &cerr)); - } -} - -TEST(ErasureCodeLRC, layers_init) -{ - { - ErasureCodeLRC LRC; - map parameters; - - const char *description_string = - "[ " - " [ \"_cDDD_cDD_\", \"directory=.libs\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - json_spirit::mArray description; - EXPECT_EQ(0, LRC.layers_description(parameters, &description, &cerr)); - EXPECT_EQ(0, LRC.layers_parse(description_string, description, &cerr)); - EXPECT_EQ(0, LRC.layers_init()); - EXPECT_EQ("5", LRC.layers.front().parameters["k"]); - EXPECT_EQ("2", LRC.layers.front().parameters["m"]); - EXPECT_EQ("jerasure", LRC.layers.front().parameters["plugin"]); - EXPECT_EQ("reed_sol_van", LRC.layers.front().parameters["technique"]); - } -} - -TEST(ErasureCodeLRC, init) -{ - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DDD__DD"; - const char *description_string = - "[ " - " [ \"_cDDD_cDD\", \"\" ]," - " [ \"c_DDD____\", \"\" ]," - " [ \"_____cDDD\", \"\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); -} - -TEST(ErasureCodeLRC, init_kml) -{ - ErasureCodeLRC LRC; - map parameters; - parameters["k"] = "4"; - parameters["m"] = "2"; - parameters["l"] = "3"; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - EXPECT_EQ((unsigned int)(4 + 2 + (4 + 2) / 3), LRC.get_chunk_count()); -} - -TEST(ErasureCodeLRC, minimum_to_decode) -{ - // trivial : no erasures, the minimum is want_to_read - { - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DDD__DD"; - const char *description_string = - "[ " - " [ \"_cDDD_cDD\", \"\" ]," - " [ \"c_DDD____\", \"\" ]," - " [ \"_____cDDD\", \"\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - set want_to_read; - want_to_read.insert(1); - set available_chunks; - available_chunks.insert(1); - available_chunks.insert(2); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(want_to_read, minimum); - } - // locally repairable erasure - { - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DDD__DD_"; - const char *description_string = - "[ " - " [ \"_cDDD_cDD_\", \"\" ]," - " [ \"c_DDD_____\", \"\" ]," - " [ \"_____cDDD_\", \"\" ]," - " [ \"_____DDDDc\", \"\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - EXPECT_EQ(parameters["mapping"].length(), - LRC.get_chunk_count()); - { - // want to read the last chunk - set want_to_read; - want_to_read.insert(LRC.get_chunk_count() - 1); - // all chunks are available except the last chunk - set available_chunks; - for (int i = 0; i < (int)LRC.get_chunk_count() - 1; i++) - available_chunks.insert(i); - // _____DDDDc can recover c - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - set expected_minimum; - expected_minimum.insert(5); - expected_minimum.insert(6); - expected_minimum.insert(7); - expected_minimum.insert(8); - EXPECT_EQ(expected_minimum, minimum); - } - { - set want_to_read; - want_to_read.insert(0); - set available_chunks; - for (int i = 1; i < (int)LRC.get_chunk_count(); i++) - available_chunks.insert(i); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - set expected_minimum; - expected_minimum.insert(2); - expected_minimum.insert(3); - expected_minimum.insert(4); - EXPECT_EQ(expected_minimum, minimum); - } - } - // implicit parity required - { - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DDD__DD"; - const char *description_string = - "[ " - " [ \"_cDDD_cDD\", \"\" ]," - " [ \"c_DDD____\", \"\" ]," - " [ \"_____cDDD\", \"\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - EXPECT_EQ(parameters["mapping"].length(), - LRC.get_chunk_count()); - set want_to_read; - want_to_read.insert(8); - // - // unable to recover, too many chunks missing - // - { - set available_chunks; - available_chunks.insert(0); - available_chunks.insert(1); - // missing (2) - // missing (3) - available_chunks.insert(4); - available_chunks.insert(5); - available_chunks.insert(6); - // missing (7) - // missing (8) - set minimum; - EXPECT_EQ(-EIO, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - } - // - // We want to read chunk 8 and encoding was done with - // - // _cDDD_cDD - // c_DDD____ - // _____cDDD - // - // First strategy fails: - // - // 012345678 - // xxXXXxxXX initial chunks - // xx.XXxx.. missing (2, 7, 8) - // _____cDDD fail : can recover 1 but 2 are missing - // c_DDD____ ignored because 8 is not used (i.e. _) - // _cDDD_cDD fail : can recover 2 but 3 are missing - // - // Second strategy succeeds: - // - // 012345678 - // xxXXXxxXX initial chunks - // xx.XXxx.. missing (2, 7, 8) - // _____cDDD fail : can recover 1 but 2 are missing - // c_DDD____ success: recovers chunk 2 - // _cDDD_cDD success: recovers chunk 7, 8 - // - { - set available_chunks; - available_chunks.insert(0); - available_chunks.insert(1); - // missing (2) - available_chunks.insert(3); - available_chunks.insert(4); - available_chunks.insert(5); - available_chunks.insert(6); - // missing (7) - // missing (8) - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(available_chunks, minimum); - } - } -} - -TEST(ErasureCodeLRC, encode_decode) -{ - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "__DD__DD"; - const char *description_string = - "[ " - " [ \"_cDD_cDD\", \"\" ]," // global layer - " [ \"c_DD____\", \"\" ]," // first local layer - " [ \"____cDDD\", \"\" ]," // second local layer - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - EXPECT_EQ(4U, LRC.get_data_chunk_count()); - unsigned int stripe_width = g_conf->osd_pool_erasure_code_stripe_width; - unsigned int chunk_size = stripe_width / LRC.get_data_chunk_count(); - EXPECT_EQ(chunk_size, LRC.get_chunk_size(stripe_width)); - set want_to_encode; - map encoded; - for (unsigned int i = 0; i < LRC.get_chunk_count(); ++i) { - want_to_encode.insert(i); - bufferptr ptr(buffer::create_page_aligned(chunk_size)); - encoded[i].push_front(ptr); - } - const vector &mapping = LRC.get_chunk_mapping(); - char c = 'A'; - for (unsigned int i = 0; i < LRC.get_data_chunk_count(); i++) { - int j = mapping[i]; - string s(chunk_size, c); - encoded[j].clear(); - encoded[j].append(s); - c++; - } - EXPECT_EQ(0, LRC.encode_chunks(want_to_encode, &encoded)); - - { - map chunks; - chunks[4] = encoded[4]; - chunks[5] = encoded[5]; - chunks[6] = encoded[6]; - set want_to_read; - want_to_read.insert(7); - set available_chunks; - available_chunks.insert(4); - available_chunks.insert(5); - available_chunks.insert(6); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - // only need three chunks from the second local layer - EXPECT_EQ(3U, minimum.size()); - EXPECT_EQ(1U, minimum.count(4)); - EXPECT_EQ(1U, minimum.count(5)); - EXPECT_EQ(1U, minimum.count(6)); - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, chunks, &decoded)); - string s(chunk_size, 'D'); - EXPECT_EQ(s, string(decoded[7].c_str(), chunk_size)); - } - { - set want_to_read; - want_to_read.insert(2); - map chunks; - chunks[1] = encoded[1]; - chunks[3] = encoded[3]; - chunks[5] = encoded[5]; - chunks[6] = encoded[6]; - chunks[7] = encoded[7]; - set available_chunks; - available_chunks.insert(1); - available_chunks.insert(3); - available_chunks.insert(5); - available_chunks.insert(6); - available_chunks.insert(7); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(5U, minimum.size()); - EXPECT_EQ(available_chunks, minimum); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, encoded, &decoded)); - string s(chunk_size, 'A'); - EXPECT_EQ(s, string(decoded[2].c_str(), chunk_size)); - } - { - set want_to_read; - want_to_read.insert(3); - want_to_read.insert(6); - want_to_read.insert(7); - set available_chunks; - available_chunks.insert(0); - available_chunks.insert(1); - available_chunks.insert(2); - // available_chunks.insert(3); - available_chunks.insert(4); - available_chunks.insert(5); - // available_chunks.insert(6); - // available_chunks.insert(7); - encoded.erase(3); - encoded.erase(6); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(4U, minimum.size()); - // only need two chunks from the first local layer - EXPECT_EQ(1U, minimum.count(0)); - EXPECT_EQ(1U, minimum.count(2)); - // the above chunks will rebuild chunk 3 and the global layer only needs - // three more chunks to reach the required amount of chunks (4) to recover - // the last two - EXPECT_EQ(1U, minimum.count(1)); - EXPECT_EQ(1U, minimum.count(2)); - EXPECT_EQ(1U, minimum.count(5)); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, encoded, &decoded)); - { - string s(chunk_size, 'B'); - EXPECT_EQ(s, string(decoded[3].c_str(), chunk_size)); - } - { - string s(chunk_size, 'C'); - EXPECT_EQ(s, string(decoded[6].c_str(), chunk_size)); - } - { - string s(chunk_size, 'D'); - EXPECT_EQ(s, string(decoded[7].c_str(), chunk_size)); - } - } -} - -TEST(ErasureCodeLRC, encode_decode_2) -{ - ErasureCodeLRC LRC; - map parameters; - parameters["mapping"] = - "DD__DD__"; - const char *description_string = - "[ " - " [ \"DDc_DDc_\", \"\" ]," - " [ \"DDDc____\", \"\" ]," - " [ \"____DDDc\", \"\" ]," - "]"; - parameters["layers"] = description_string; - parameters["directory"] = ".libs"; - EXPECT_EQ(0, LRC.init(parameters, &cerr)); - EXPECT_EQ(4U, LRC.get_data_chunk_count()); - unsigned int stripe_width = g_conf->osd_pool_erasure_code_stripe_width; - unsigned int chunk_size = stripe_width / LRC.get_data_chunk_count(); - EXPECT_EQ(chunk_size, LRC.get_chunk_size(stripe_width)); - set want_to_encode; - map encoded; - for (unsigned int i = 0; i < LRC.get_chunk_count(); ++i) { - want_to_encode.insert(i); - bufferptr ptr(buffer::create_page_aligned(chunk_size)); - encoded[i].push_front(ptr); - } - const vector &mapping = LRC.get_chunk_mapping(); - char c = 'A'; - for (unsigned int i = 0; i < LRC.get_data_chunk_count(); i++) { - int j = mapping[i]; - string s(chunk_size, c); - encoded[j].clear(); - encoded[j].append(s); - c++; - } - EXPECT_EQ(0, LRC.encode_chunks(want_to_encode, &encoded)); - - { - set want_to_read; - want_to_read.insert(0); - map chunks; - chunks[1] = encoded[1]; - chunks[3] = encoded[3]; - chunks[4] = encoded[4]; - chunks[5] = encoded[5]; - chunks[6] = encoded[6]; - chunks[7] = encoded[7]; - set available_chunks; - available_chunks.insert(1); - available_chunks.insert(3); - available_chunks.insert(4); - available_chunks.insert(5); - available_chunks.insert(6); - available_chunks.insert(7); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(4U, minimum.size()); - EXPECT_EQ(1U, minimum.count(1)); - EXPECT_EQ(1U, minimum.count(4)); - EXPECT_EQ(1U, minimum.count(5)); - EXPECT_EQ(1U, minimum.count(6)); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, chunks, &decoded)); - string s(chunk_size, 'A'); - EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); - } - { - set want_to_read; - for (unsigned int i = 0; i < LRC.get_chunk_count(); i++) - want_to_read.insert(i); - map chunks; - chunks[1] = encoded[1]; - chunks[3] = encoded[3]; - chunks[5] = encoded[5]; - chunks[6] = encoded[6]; - chunks[7] = encoded[7]; - set available_chunks; - available_chunks.insert(1); - available_chunks.insert(3); - available_chunks.insert(5); - available_chunks.insert(6); - available_chunks.insert(7); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(5U, minimum.size()); - EXPECT_EQ(1U, minimum.count(1)); - EXPECT_EQ(1U, minimum.count(3)); - EXPECT_EQ(1U, minimum.count(5)); - EXPECT_EQ(1U, minimum.count(6)); - EXPECT_EQ(1U, minimum.count(7)); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, chunks, &decoded)); - { - string s(chunk_size, 'A'); - EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); - } - { - string s(chunk_size, 'B'); - EXPECT_EQ(s, string(decoded[1].c_str(), chunk_size)); - } - { - string s(chunk_size, 'C'); - EXPECT_EQ(s, string(decoded[4].c_str(), chunk_size)); - } - { - string s(chunk_size, 'D'); - EXPECT_EQ(s, string(decoded[5].c_str(), chunk_size)); - } - } - { - set want_to_read; - for (unsigned int i = 0; i < LRC.get_chunk_count(); i++) - want_to_read.insert(i); - map chunks; - chunks[1] = encoded[1]; - chunks[3] = encoded[3]; - chunks[5] = encoded[5]; - chunks[6] = encoded[6]; - chunks[7] = encoded[7]; - set available_chunks; - available_chunks.insert(1); - available_chunks.insert(3); - available_chunks.insert(5); - available_chunks.insert(6); - available_chunks.insert(7); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(5U, minimum.size()); - EXPECT_EQ(1U, minimum.count(1)); - EXPECT_EQ(1U, minimum.count(3)); - EXPECT_EQ(1U, minimum.count(5)); - EXPECT_EQ(1U, minimum.count(6)); - EXPECT_EQ(1U, minimum.count(7)); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, chunks, &decoded)); - { - string s(chunk_size, 'A'); - EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); - } - { - string s(chunk_size, 'B'); - EXPECT_EQ(s, string(decoded[1].c_str(), chunk_size)); - } - { - string s(chunk_size, 'C'); - EXPECT_EQ(s, string(decoded[4].c_str(), chunk_size)); - } - { - string s(chunk_size, 'D'); - EXPECT_EQ(s, string(decoded[5].c_str(), chunk_size)); - } - } - { - set want_to_read; - want_to_read.insert(6); - map chunks; - chunks[0] = encoded[0]; - chunks[1] = encoded[1]; - chunks[3] = encoded[3]; - chunks[5] = encoded[5]; - chunks[7] = encoded[7]; - set available_chunks; - available_chunks.insert(0); - available_chunks.insert(1); - available_chunks.insert(3); - available_chunks.insert(5); - available_chunks.insert(7); - set minimum; - EXPECT_EQ(0, LRC.minimum_to_decode(want_to_read, available_chunks, &minimum)); - EXPECT_EQ(available_chunks, minimum); - - map decoded; - EXPECT_EQ(0, LRC.decode(want_to_read, chunks, &decoded)); - } -} - -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -/* - * Local Variables: - * compile-command: "cd ../.. ; - * make -j4 && valgrind --tool=memcheck --leak-check=full \ - * ./unittest_erasure_code_LRC \ - * --gtest_filter=*.* --log-to-stderr=true --debug-osd=20" - * End: - */ diff --git a/src/test/erasure-code/TestErasureCodeLrc.cc b/src/test/erasure-code/TestErasureCodeLrc.cc new file mode 100644 index 000000000000..c3f0e68cb06d --- /dev/null +++ b/src/test/erasure-code/TestErasureCodeLrc.cc @@ -0,0 +1,936 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include + +#include "crush/CrushWrapper.h" +#include "common/config.h" +#include "include/stringify.h" +#include "global/global_init.h" +#include "erasure-code/lrc/ErasureCodeLrc.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "gtest/gtest.h" + +TEST(ErasureCodeLrc, parse_ruleset) +{ + ErasureCodeLrc lrc; + EXPECT_EQ("default", lrc.ruleset_root); + EXPECT_EQ("host", lrc.ruleset_steps.front().type); + + map parameters; + parameters["ruleset-root"] = "other"; + EXPECT_EQ(0, lrc.parse_ruleset(parameters, &cerr)); + EXPECT_EQ("other", lrc.ruleset_root); + + parameters["ruleset-steps"] = "[]"; + EXPECT_EQ(0, lrc.parse_ruleset(parameters, &cerr)); + EXPECT_TRUE(lrc.ruleset_steps.empty()); + + parameters["ruleset-steps"] = "0"; + EXPECT_EQ(ERROR_LRC_ARRAY, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "{"; + EXPECT_EQ(ERROR_LRC_PARSE_JSON, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "[0]"; + EXPECT_EQ(ERROR_LRC_ARRAY, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "[[0]]"; + EXPECT_EQ(ERROR_LRC_RULESET_OP, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "[[\"choose\", 0]]"; + EXPECT_EQ(ERROR_LRC_RULESET_TYPE, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "[[\"choose\", \"host\", []]]"; + EXPECT_EQ(ERROR_LRC_RULESET_N, lrc.parse_ruleset(parameters, &cerr)); + + parameters["ruleset-steps"] = "[[\"choose\", \"host\", 2]]"; + EXPECT_EQ(0, lrc.parse_ruleset(parameters, &cerr)); + + const ErasureCodeLrc::Step &step = lrc.ruleset_steps.front(); + EXPECT_EQ("choose", step.op); + EXPECT_EQ("host", step.type); + EXPECT_EQ(2, step.n); + + parameters["ruleset-steps"] = + "[" + " [\"choose\", \"rack\", 2], " + " [\"chooseleaf\", \"host\", 5], " + "]"; + EXPECT_EQ(0, lrc.parse_ruleset(parameters, &cerr)); + EXPECT_EQ(2U, lrc.ruleset_steps.size()); + { + const ErasureCodeLrc::Step &step = lrc.ruleset_steps[0]; + EXPECT_EQ("choose", step.op); + EXPECT_EQ("rack", step.type); + EXPECT_EQ(2, step.n); + } + { + const ErasureCodeLrc::Step &step = lrc.ruleset_steps[1]; + EXPECT_EQ("chooseleaf", step.op); + EXPECT_EQ("host", step.type); + EXPECT_EQ(5, step.n); + } +} + +TEST(ErasureCodeTest, create_ruleset) +{ + CrushWrapper *c = new CrushWrapper; + c->create(); + int root_type = 3; + c->set_type_name(root_type, "root"); + int rack_type = 2; + c->set_type_name(rack_type, "rack"); + int host_type = 1; + c->set_type_name(host_type, "host"); + int osd_type = 0; + c->set_type_name(osd_type, "osd"); + + int rootno; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + root_type, 0, NULL, NULL, &rootno); + c->set_item_name(rootno, "default"); + + map loc; + loc["root"] = "default"; + + // + // Set all to 10 so that the item number it trivial to decompose + // into rack/host/osd. + // + int num_rack; + int num_host; + int num_osd; + num_rack = num_host = num_osd = 10; + int osd = 0; + for (int r=0; rinsert_item(g_ceph_context, osd, 1.0, string("osd.") + stringify(osd), loc); + } + } + } + + ErasureCodeLrc lrc; + EXPECT_EQ(0, lrc.create_ruleset("rule1", *c, &cerr)); + + map parameters; + unsigned int racks = 2; + unsigned int hosts = 5; + parameters["ruleset-steps"] = + "[" + " [\"choose\", \"rack\", " + stringify(racks) + "], " + " [\"chooseleaf\", \"host\", " + stringify(hosts) + "], " + "]"; + const char *rule_name = "rule2"; + EXPECT_EQ(0, lrc.parse_ruleset(parameters, &cerr)); + EXPECT_EQ(1, lrc.create_ruleset(rule_name, *c, &cerr)); + + vector<__u32> weight; + for (int o = 0; o < c->get_max_devices(); o++) + weight.push_back(0x10000); + int rule = c->get_rule_id(rule_name); + vector out; + unsigned int n = racks * hosts; + c->do_rule(rule, 1, out, n, weight); + EXPECT_EQ(n, out.size()); + // + // check that the first five are in the same rack and the next five + // in the same rack + // + int first_rack = out[0] / num_host / num_osd; + EXPECT_EQ(first_rack, out[1] / num_host / num_osd); + EXPECT_EQ(first_rack, out[2] / num_host / num_osd); + EXPECT_EQ(first_rack, out[3] / num_host / num_osd); + EXPECT_EQ(first_rack, out[4] / num_host / num_osd); + int second_rack = out[5] / num_host / num_osd; + EXPECT_EQ(second_rack, out[6] / num_host / num_osd); + EXPECT_EQ(second_rack, out[7] / num_host / num_osd); + EXPECT_EQ(second_rack, out[8] / num_host / num_osd); + EXPECT_EQ(second_rack, out[9] / num_host / num_osd); +} + +TEST(ErasureCodeLrc, parse_kml) +{ + ErasureCodeLrc lrc; + map parameters; + EXPECT_EQ(0, lrc.parse_kml(parameters, &cerr)); + parameters["k"] = "4"; + EXPECT_EQ(ERROR_LRC_ALL_OR_NOTHING, lrc.parse_kml(parameters, &cerr)); + const char *generated[] = { "mapping", + "layers", + "ruleset-steps" }; + parameters["m"] = "2"; + parameters["l"] = "3"; + + for (int i = 0; i < 3; i++) { + parameters[generated[i]] = "SET"; + EXPECT_EQ(ERROR_LRC_GENERATED, lrc.parse_kml(parameters, &cerr)); + parameters.erase(parameters.find(generated[i])); + } + + parameters["k"] = "4"; + parameters["m"] = "2"; + parameters["l"] = "7"; + EXPECT_EQ(ERROR_LRC_K_M_MODULO, lrc.parse_kml(parameters, &cerr)); + + parameters["k"] = "3"; + parameters["m"] = "3"; + parameters["l"] = "3"; + EXPECT_EQ(ERROR_LRC_K_MODULO, lrc.parse_kml(parameters, &cerr)); + + parameters["k"] = "4"; + parameters["m"] = "2"; + parameters["l"] = "3"; + EXPECT_EQ(0, lrc.parse_kml(parameters, &cerr)); + EXPECT_EQ("[ " + " [ \"DDc_DDc_\", \"\" ]," + " [ \"DDDc____\", \"\" ]," + " [ \"____DDDc\", \"\" ]," + "]", parameters["layers"]); + EXPECT_EQ("DD__DD__", parameters["mapping"]); + EXPECT_EQ("chooseleaf", lrc.ruleset_steps[0].op); + EXPECT_EQ("host", lrc.ruleset_steps[0].type); + EXPECT_EQ(0, lrc.ruleset_steps[0].n); + EXPECT_EQ(1U, lrc.ruleset_steps.size()); + parameters.erase(parameters.find("mapping")); + parameters.erase(parameters.find("layers")); + + parameters["k"] = "4"; + parameters["m"] = "2"; + parameters["l"] = "3"; + parameters["ruleset-failure-domain"] = "osd"; + EXPECT_EQ(0, lrc.parse_kml(parameters, &cerr)); + EXPECT_EQ("chooseleaf", lrc.ruleset_steps[0].op); + EXPECT_EQ("osd", lrc.ruleset_steps[0].type); + EXPECT_EQ(0, lrc.ruleset_steps[0].n); + EXPECT_EQ(1U, lrc.ruleset_steps.size()); + parameters.erase(parameters.find("mapping")); + parameters.erase(parameters.find("layers")); + + parameters["k"] = "4"; + parameters["m"] = "2"; + parameters["l"] = "3"; + parameters["ruleset-failure-domain"] = "osd"; + parameters["ruleset-locality"] = "rack"; + EXPECT_EQ(0, lrc.parse_kml(parameters, &cerr)); + EXPECT_EQ("choose", lrc.ruleset_steps[0].op); + EXPECT_EQ("rack", lrc.ruleset_steps[0].type); + EXPECT_EQ(2, lrc.ruleset_steps[0].n); + EXPECT_EQ("chooseleaf", lrc.ruleset_steps[1].op); + EXPECT_EQ("osd", lrc.ruleset_steps[1].type); + EXPECT_EQ(4, lrc.ruleset_steps[1].n); + EXPECT_EQ(2U, lrc.ruleset_steps.size()); + parameters.erase(parameters.find("mapping")); + parameters.erase(parameters.find("layers")); +} + +TEST(ErasureCodeLrc, layers_description) +{ + ErasureCodeLrc lrc; + map parameters; + + json_spirit::mArray description; + EXPECT_EQ(ERROR_LRC_DESCRIPTION, + lrc.layers_description(parameters, &description, &cerr)); + + { + const char *description_string = "\"not an array\""; + parameters["layers"] = description_string; + EXPECT_EQ(ERROR_LRC_ARRAY, + lrc.layers_description(parameters, &description, &cerr)); + } + { + const char *description_string = "invalid json"; + parameters["layers"] = description_string; + EXPECT_EQ(ERROR_LRC_PARSE_JSON, + lrc.layers_description(parameters, &description, &cerr)); + } + { + const char *description_string = "[]"; + parameters["layers"] = description_string; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + } +} + +TEST(ErasureCodeLrc, layers_parse) +{ + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string ="[ 0 ]"; + parameters["layers"] = description_string; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(ERROR_LRC_ARRAY, + lrc.layers_parse(description_string, description, &cerr)); + } + + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string ="[ [ 0 ] ]"; + parameters["layers"] = description_string; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(ERROR_LRC_STR, + lrc.layers_parse(description_string, description, &cerr)); + } + + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string ="[ [ \"\", 0 ] ]"; + parameters["layers"] = description_string; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(ERROR_LRC_CONFIG_OPTIONS, + lrc.layers_parse(description_string, description, &cerr)); + } + + // + // The second element can be an object describing the plugin + // parameters. + // + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string ="[ [ \"\", { \"a\": \"b\" }, \"ignored\" ] ]"; + parameters["layers"] = description_string; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(0, lrc.layers_parse(description_string, description, &cerr)); + EXPECT_EQ("b", lrc.layers.front().parameters["a"]); + } + + // + // The second element can be a str_map parseable string describing the plugin + // parameters. + // + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string ="[ [ \"\", \"a=b c=d\" ] ]"; + parameters["layers"] = description_string; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(0, lrc.layers_parse(description_string, description, &cerr)); + EXPECT_EQ("b", lrc.layers.front().parameters["a"]); + EXPECT_EQ("d", lrc.layers.front().parameters["c"]); + } + +} + +TEST(ErasureCodeLrc, layers_sanity_checks) +{ + { + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DDD__DD"; + parameters["directory"] = ".libs"; + const char *description_string = + "[ " + " [ \"_cDDD_cDD\", \"\" ]," + " [ \"c_DDD____\", \"\" ]," + " [ \"_____cDDD\", \"\" ]," + "]"; + parameters["layers"] = description_string; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + } + { + ErasureCodeLrc lrc; + map parameters; + const char *description_string = + "[ " + "]"; + parameters["layers"] = description_string; + EXPECT_EQ(ERROR_LRC_MAPPING, lrc.init(parameters, &cerr)); + } + { + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = ""; + const char *description_string = + "[ " + "]"; + parameters["layers"] = description_string; + EXPECT_EQ(ERROR_LRC_LAYERS_COUNT, lrc.init(parameters, &cerr)); + } + { + ErasureCodeLrc lrc; + map parameters; + parameters["directory"] = ".libs"; + parameters["mapping"] = + "AA"; + const char *description_string = + "[ " + " [ \"AA??\", \"\" ], " + " [ \"AA\", \"\" ], " + " [ \"AA\", \"\" ], " + "]"; + parameters["layers"] = description_string; + EXPECT_EQ(ERROR_LRC_MAPPING_SIZE, lrc.init(parameters, &cerr)); + } +} + +TEST(ErasureCodeLrc, layers_init) +{ + { + ErasureCodeLrc lrc; + map parameters; + + const char *description_string = + "[ " + " [ \"_cDDD_cDD_\", \"directory=.libs\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + json_spirit::mArray description; + EXPECT_EQ(0, lrc.layers_description(parameters, &description, &cerr)); + EXPECT_EQ(0, lrc.layers_parse(description_string, description, &cerr)); + EXPECT_EQ(0, lrc.layers_init()); + EXPECT_EQ("5", lrc.layers.front().parameters["k"]); + EXPECT_EQ("2", lrc.layers.front().parameters["m"]); + EXPECT_EQ("jerasure", lrc.layers.front().parameters["plugin"]); + EXPECT_EQ("reed_sol_van", lrc.layers.front().parameters["technique"]); + } +} + +TEST(ErasureCodeLrc, init) +{ + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DDD__DD"; + const char *description_string = + "[ " + " [ \"_cDDD_cDD\", \"\" ]," + " [ \"c_DDD____\", \"\" ]," + " [ \"_____cDDD\", \"\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); +} + +TEST(ErasureCodeLrc, init_kml) +{ + ErasureCodeLrc lrc; + map parameters; + parameters["k"] = "4"; + parameters["m"] = "2"; + parameters["l"] = "3"; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + EXPECT_EQ((unsigned int)(4 + 2 + (4 + 2) / 3), lrc.get_chunk_count()); +} + +TEST(ErasureCodeLrc, minimum_to_decode) +{ + // trivial : no erasures, the minimum is want_to_read + { + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DDD__DD"; + const char *description_string = + "[ " + " [ \"_cDDD_cDD\", \"\" ]," + " [ \"c_DDD____\", \"\" ]," + " [ \"_____cDDD\", \"\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + set want_to_read; + want_to_read.insert(1); + set available_chunks; + available_chunks.insert(1); + available_chunks.insert(2); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(want_to_read, minimum); + } + // locally repairable erasure + { + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DDD__DD_"; + const char *description_string = + "[ " + " [ \"_cDDD_cDD_\", \"\" ]," + " [ \"c_DDD_____\", \"\" ]," + " [ \"_____cDDD_\", \"\" ]," + " [ \"_____DDDDc\", \"\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + EXPECT_EQ(parameters["mapping"].length(), + lrc.get_chunk_count()); + { + // want to read the last chunk + set want_to_read; + want_to_read.insert(lrc.get_chunk_count() - 1); + // all chunks are available except the last chunk + set available_chunks; + for (int i = 0; i < (int)lrc.get_chunk_count() - 1; i++) + available_chunks.insert(i); + // _____DDDDc can recover c + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + set expected_minimum; + expected_minimum.insert(5); + expected_minimum.insert(6); + expected_minimum.insert(7); + expected_minimum.insert(8); + EXPECT_EQ(expected_minimum, minimum); + } + { + set want_to_read; + want_to_read.insert(0); + set available_chunks; + for (int i = 1; i < (int)lrc.get_chunk_count(); i++) + available_chunks.insert(i); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + set expected_minimum; + expected_minimum.insert(2); + expected_minimum.insert(3); + expected_minimum.insert(4); + EXPECT_EQ(expected_minimum, minimum); + } + } + // implicit parity required + { + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DDD__DD"; + const char *description_string = + "[ " + " [ \"_cDDD_cDD\", \"\" ]," + " [ \"c_DDD____\", \"\" ]," + " [ \"_____cDDD\", \"\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + EXPECT_EQ(parameters["mapping"].length(), + lrc.get_chunk_count()); + set want_to_read; + want_to_read.insert(8); + // + // unable to recover, too many chunks missing + // + { + set available_chunks; + available_chunks.insert(0); + available_chunks.insert(1); + // missing (2) + // missing (3) + available_chunks.insert(4); + available_chunks.insert(5); + available_chunks.insert(6); + // missing (7) + // missing (8) + set minimum; + EXPECT_EQ(-EIO, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + } + // + // We want to read chunk 8 and encoding was done with + // + // _cDDD_cDD + // c_DDD____ + // _____cDDD + // + // First strategy fails: + // + // 012345678 + // xxXXXxxXX initial chunks + // xx.XXxx.. missing (2, 7, 8) + // _____cDDD fail : can recover 1 but 2 are missing + // c_DDD____ ignored because 8 is not used (i.e. _) + // _cDDD_cDD fail : can recover 2 but 3 are missing + // + // Second strategy succeeds: + // + // 012345678 + // xxXXXxxXX initial chunks + // xx.XXxx.. missing (2, 7, 8) + // _____cDDD fail : can recover 1 but 2 are missing + // c_DDD____ success: recovers chunk 2 + // _cDDD_cDD success: recovers chunk 7, 8 + // + { + set available_chunks; + available_chunks.insert(0); + available_chunks.insert(1); + // missing (2) + available_chunks.insert(3); + available_chunks.insert(4); + available_chunks.insert(5); + available_chunks.insert(6); + // missing (7) + // missing (8) + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(available_chunks, minimum); + } + } +} + +TEST(ErasureCodeLrc, encode_decode) +{ + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "__DD__DD"; + const char *description_string = + "[ " + " [ \"_cDD_cDD\", \"\" ]," // global layer + " [ \"c_DD____\", \"\" ]," // first local layer + " [ \"____cDDD\", \"\" ]," // second local layer + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + EXPECT_EQ(4U, lrc.get_data_chunk_count()); + unsigned int stripe_width = g_conf->osd_pool_erasure_code_stripe_width; + unsigned int chunk_size = stripe_width / lrc.get_data_chunk_count(); + EXPECT_EQ(chunk_size, lrc.get_chunk_size(stripe_width)); + set want_to_encode; + map encoded; + for (unsigned int i = 0; i < lrc.get_chunk_count(); ++i) { + want_to_encode.insert(i); + bufferptr ptr(buffer::create_page_aligned(chunk_size)); + encoded[i].push_front(ptr); + } + const vector &mapping = lrc.get_chunk_mapping(); + char c = 'A'; + for (unsigned int i = 0; i < lrc.get_data_chunk_count(); i++) { + int j = mapping[i]; + string s(chunk_size, c); + encoded[j].clear(); + encoded[j].append(s); + c++; + } + EXPECT_EQ(0, lrc.encode_chunks(want_to_encode, &encoded)); + + { + map chunks; + chunks[4] = encoded[4]; + chunks[5] = encoded[5]; + chunks[6] = encoded[6]; + set want_to_read; + want_to_read.insert(7); + set available_chunks; + available_chunks.insert(4); + available_chunks.insert(5); + available_chunks.insert(6); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + // only need three chunks from the second local layer + EXPECT_EQ(3U, minimum.size()); + EXPECT_EQ(1U, minimum.count(4)); + EXPECT_EQ(1U, minimum.count(5)); + EXPECT_EQ(1U, minimum.count(6)); + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, chunks, &decoded)); + string s(chunk_size, 'D'); + EXPECT_EQ(s, string(decoded[7].c_str(), chunk_size)); + } + { + set want_to_read; + want_to_read.insert(2); + map chunks; + chunks[1] = encoded[1]; + chunks[3] = encoded[3]; + chunks[5] = encoded[5]; + chunks[6] = encoded[6]; + chunks[7] = encoded[7]; + set available_chunks; + available_chunks.insert(1); + available_chunks.insert(3); + available_chunks.insert(5); + available_chunks.insert(6); + available_chunks.insert(7); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(5U, minimum.size()); + EXPECT_EQ(available_chunks, minimum); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, encoded, &decoded)); + string s(chunk_size, 'A'); + EXPECT_EQ(s, string(decoded[2].c_str(), chunk_size)); + } + { + set want_to_read; + want_to_read.insert(3); + want_to_read.insert(6); + want_to_read.insert(7); + set available_chunks; + available_chunks.insert(0); + available_chunks.insert(1); + available_chunks.insert(2); + // available_chunks.insert(3); + available_chunks.insert(4); + available_chunks.insert(5); + // available_chunks.insert(6); + // available_chunks.insert(7); + encoded.erase(3); + encoded.erase(6); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(4U, minimum.size()); + // only need two chunks from the first local layer + EXPECT_EQ(1U, minimum.count(0)); + EXPECT_EQ(1U, minimum.count(2)); + // the above chunks will rebuild chunk 3 and the global layer only needs + // three more chunks to reach the required amount of chunks (4) to recover + // the last two + EXPECT_EQ(1U, minimum.count(1)); + EXPECT_EQ(1U, minimum.count(2)); + EXPECT_EQ(1U, minimum.count(5)); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, encoded, &decoded)); + { + string s(chunk_size, 'B'); + EXPECT_EQ(s, string(decoded[3].c_str(), chunk_size)); + } + { + string s(chunk_size, 'C'); + EXPECT_EQ(s, string(decoded[6].c_str(), chunk_size)); + } + { + string s(chunk_size, 'D'); + EXPECT_EQ(s, string(decoded[7].c_str(), chunk_size)); + } + } +} + +TEST(ErasureCodeLrc, encode_decode_2) +{ + ErasureCodeLrc lrc; + map parameters; + parameters["mapping"] = + "DD__DD__"; + const char *description_string = + "[ " + " [ \"DDc_DDc_\", \"\" ]," + " [ \"DDDc____\", \"\" ]," + " [ \"____DDDc\", \"\" ]," + "]"; + parameters["layers"] = description_string; + parameters["directory"] = ".libs"; + EXPECT_EQ(0, lrc.init(parameters, &cerr)); + EXPECT_EQ(4U, lrc.get_data_chunk_count()); + unsigned int stripe_width = g_conf->osd_pool_erasure_code_stripe_width; + unsigned int chunk_size = stripe_width / lrc.get_data_chunk_count(); + EXPECT_EQ(chunk_size, lrc.get_chunk_size(stripe_width)); + set want_to_encode; + map encoded; + for (unsigned int i = 0; i < lrc.get_chunk_count(); ++i) { + want_to_encode.insert(i); + bufferptr ptr(buffer::create_page_aligned(chunk_size)); + encoded[i].push_front(ptr); + } + const vector &mapping = lrc.get_chunk_mapping(); + char c = 'A'; + for (unsigned int i = 0; i < lrc.get_data_chunk_count(); i++) { + int j = mapping[i]; + string s(chunk_size, c); + encoded[j].clear(); + encoded[j].append(s); + c++; + } + EXPECT_EQ(0, lrc.encode_chunks(want_to_encode, &encoded)); + + { + set want_to_read; + want_to_read.insert(0); + map chunks; + chunks[1] = encoded[1]; + chunks[3] = encoded[3]; + chunks[4] = encoded[4]; + chunks[5] = encoded[5]; + chunks[6] = encoded[6]; + chunks[7] = encoded[7]; + set available_chunks; + available_chunks.insert(1); + available_chunks.insert(3); + available_chunks.insert(4); + available_chunks.insert(5); + available_chunks.insert(6); + available_chunks.insert(7); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(4U, minimum.size()); + EXPECT_EQ(1U, minimum.count(1)); + EXPECT_EQ(1U, minimum.count(4)); + EXPECT_EQ(1U, minimum.count(5)); + EXPECT_EQ(1U, minimum.count(6)); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, chunks, &decoded)); + string s(chunk_size, 'A'); + EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); + } + { + set want_to_read; + for (unsigned int i = 0; i < lrc.get_chunk_count(); i++) + want_to_read.insert(i); + map chunks; + chunks[1] = encoded[1]; + chunks[3] = encoded[3]; + chunks[5] = encoded[5]; + chunks[6] = encoded[6]; + chunks[7] = encoded[7]; + set available_chunks; + available_chunks.insert(1); + available_chunks.insert(3); + available_chunks.insert(5); + available_chunks.insert(6); + available_chunks.insert(7); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(5U, minimum.size()); + EXPECT_EQ(1U, minimum.count(1)); + EXPECT_EQ(1U, minimum.count(3)); + EXPECT_EQ(1U, minimum.count(5)); + EXPECT_EQ(1U, minimum.count(6)); + EXPECT_EQ(1U, minimum.count(7)); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, chunks, &decoded)); + { + string s(chunk_size, 'A'); + EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); + } + { + string s(chunk_size, 'B'); + EXPECT_EQ(s, string(decoded[1].c_str(), chunk_size)); + } + { + string s(chunk_size, 'C'); + EXPECT_EQ(s, string(decoded[4].c_str(), chunk_size)); + } + { + string s(chunk_size, 'D'); + EXPECT_EQ(s, string(decoded[5].c_str(), chunk_size)); + } + } + { + set want_to_read; + for (unsigned int i = 0; i < lrc.get_chunk_count(); i++) + want_to_read.insert(i); + map chunks; + chunks[1] = encoded[1]; + chunks[3] = encoded[3]; + chunks[5] = encoded[5]; + chunks[6] = encoded[6]; + chunks[7] = encoded[7]; + set available_chunks; + available_chunks.insert(1); + available_chunks.insert(3); + available_chunks.insert(5); + available_chunks.insert(6); + available_chunks.insert(7); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(5U, minimum.size()); + EXPECT_EQ(1U, minimum.count(1)); + EXPECT_EQ(1U, minimum.count(3)); + EXPECT_EQ(1U, minimum.count(5)); + EXPECT_EQ(1U, minimum.count(6)); + EXPECT_EQ(1U, minimum.count(7)); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, chunks, &decoded)); + { + string s(chunk_size, 'A'); + EXPECT_EQ(s, string(decoded[0].c_str(), chunk_size)); + } + { + string s(chunk_size, 'B'); + EXPECT_EQ(s, string(decoded[1].c_str(), chunk_size)); + } + { + string s(chunk_size, 'C'); + EXPECT_EQ(s, string(decoded[4].c_str(), chunk_size)); + } + { + string s(chunk_size, 'D'); + EXPECT_EQ(s, string(decoded[5].c_str(), chunk_size)); + } + } + { + set want_to_read; + want_to_read.insert(6); + map chunks; + chunks[0] = encoded[0]; + chunks[1] = encoded[1]; + chunks[3] = encoded[3]; + chunks[5] = encoded[5]; + chunks[7] = encoded[7]; + set available_chunks; + available_chunks.insert(0); + available_chunks.insert(1); + available_chunks.insert(3); + available_chunks.insert(5); + available_chunks.insert(7); + set minimum; + EXPECT_EQ(0, lrc.minimum_to_decode(want_to_read, available_chunks, &minimum)); + EXPECT_EQ(available_chunks, minimum); + + map decoded; + EXPECT_EQ(0, lrc.decode(want_to_read, chunks, &decoded)); + } +} + +int main(int argc, char **argv) +{ + vector args; + argv_to_vec(argc, (const char **)argv, args); + + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; + * make -j4 && valgrind --tool=memcheck --leak-check=full \ + * ./unittest_erasure_code_lrc \ + * --gtest_filter=*.* --log-to-stderr=true --debug-osd=20" + * End: + */ diff --git a/src/test/erasure-code/TestErasureCodePluginLRC.cc b/src/test/erasure-code/TestErasureCodePluginLRC.cc deleted file mode 100644 index d2f300df77b5..000000000000 --- a/src/test/erasure-code/TestErasureCodePluginLRC.cc +++ /dev/null @@ -1,58 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph distributed storage system - * - * Copyright (C) 2014 Cloudwatt - * - * Author: Loic Dachary - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#include -#include "arch/probe.h" -#include "arch/intel.h" -#include "global/global_init.h" -#include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" -#include "global/global_context.h" -#include "gtest/gtest.h" - -TEST(ErasureCodePlugin, factory) -{ - ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); - map parameters; - parameters["directory"] = ".libs"; - parameters["mapping"] = "DD_"; - parameters["layers"] = "[ [ \"DDc\", \"\" ] ]"; - ErasureCodeInterfaceRef erasure_code; - EXPECT_FALSE(erasure_code); - EXPECT_EQ(0, instance.factory("LRC", parameters, &erasure_code, cerr)); - EXPECT_TRUE(erasure_code); -} - -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -/* - * Local Variables: - * compile-command: "cd ../.. ; make -j4 && - * make unittest_erasure_code_plugin_LRC && - * valgrind --tool=memcheck ./unittest_erasure_code_plugin_LRC \ - * --gtest_filter=*.* --log-to-stderr=true --debug-osd=20" - * End: - */ diff --git a/src/test/erasure-code/TestErasureCodePluginLrc.cc b/src/test/erasure-code/TestErasureCodePluginLrc.cc new file mode 100644 index 000000000000..9376d9b4a834 --- /dev/null +++ b/src/test/erasure-code/TestErasureCodePluginLrc.cc @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph distributed storage system + * + * Copyright (C) 2014 Cloudwatt + * Copyright (C) 2014 Red Hat + * + * Author: Loic Dachary + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#include +#include "arch/probe.h" +#include "arch/intel.h" +#include "global/global_init.h" +#include "erasure-code/ErasureCodePlugin.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "gtest/gtest.h" + +TEST(ErasureCodePlugin, factory) +{ + ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance(); + map parameters; + parameters["directory"] = ".libs"; + parameters["mapping"] = "DD_"; + parameters["layers"] = "[ [ \"DDc\", \"\" ] ]"; + ErasureCodeInterfaceRef erasure_code; + EXPECT_FALSE(erasure_code); + EXPECT_EQ(0, instance.factory("lrc", parameters, &erasure_code, cerr)); + EXPECT_TRUE(erasure_code); +} + +int main(int argc, char **argv) +{ + vector args; + argv_to_vec(argc, (const char **)argv, args); + + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; make -j4 && + * make unittest_erasure_code_plugin_lrc && + * valgrind --tool=memcheck ./unittest_erasure_code_plugin_lrc \ + * --gtest_filter=*.* --log-to-stderr=true --debug-osd=20" + * End: + */ diff --git a/src/test/erasure-code/test-erasure-code.sh b/src/test/erasure-code/test-erasure-code.sh index 50ba71dce19e..6f31673c1c99 100755 --- a/src/test/erasure-code/test-erasure-code.sh +++ b/src/test/erasure-code/test-erasure-code.sh @@ -111,13 +111,13 @@ function plugin_exists() { return $status } -function TEST_rados_put_get_LRC_advanced() { +function TEST_rados_put_get_lrc_advanced() { local dir=$1 - local poolname=pool-LRC - local profile=profile-LRC + local poolname=pool-lrc + local profile=profile-lrc ./ceph osd erasure-code-profile set $profile \ - plugin=LRC \ + plugin=lrc \ mapping=DD_ \ ruleset-steps='[ [ "chooseleaf", "osd", 0 ] ]' \ layers='[ [ "DDc", "" ] ]' || return 1 @@ -130,13 +130,13 @@ function TEST_rados_put_get_LRC_advanced() { ./ceph osd erasure-code-profile rm $profile } -function TEST_rados_put_get_LRC_kml() { +function TEST_rados_put_get_lrc_kml() { local dir=$1 - local poolname=pool-LRC - local profile=profile-LRC + local poolname=pool-lrc + local profile=profile-lrc ./ceph osd erasure-code-profile set $profile \ - plugin=LRC \ + plugin=lrc \ k=4 m=2 l=3 \ ruleset-failure-domain=osd || return 1 ./ceph osd pool create $poolname 12 12 erasure $profile \ @@ -259,7 +259,7 @@ function TEST_chunk_mapping() { verify_chunk_mapping $dir ecpool 0 1 || return 1 ./ceph osd erasure-code-profile set remap-profile \ - plugin=LRC \ + plugin=lrc \ layers='[ [ "_DD", "" ] ]' \ mapping='_DD' \ ruleset-steps='[ [ "choose", "osd", 0 ] ]' || return 1 diff --git a/src/test/mon/osd-pool-create.sh b/src/test/mon/osd-pool-create.sh index 311fc2b8c456..b73cd5d44645 100755 --- a/src/test/mon/osd-pool-create.sh +++ b/src/test/mon/osd-pool-create.sh @@ -214,12 +214,12 @@ function TEST_replicated_pool_with_ruleset() { grep "doesn't exist" || return 1 } -function TEST_erasure_code_pool_LRC() { +function TEST_erasure_code_pool_lrc() { local dir=$1 run_mon $dir a --public-addr 127.0.0.1 ./ceph osd erasure-code-profile set LRCprofile \ - plugin=LRC \ + plugin=lrc \ mapping=DD_ \ layers='[ [ "DDc", "" ] ]' || return 1